TargetTransformInfo.h source code [llvm/include/llvm/Analysis/TargetTransformInfo.h]

1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	/// This pass exposes codegen information to IR-level passes. Every
10	/// transformation that uses codegen information is broken into three parts:
11	/// 1. The IR-level analysis pass.
12	/// 2. The IR-level transformation interface which provides the needed
13	/// information.
14	/// 3. Codegen-level implementation which uses target-specific hooks.
15	///
16	/// This file defines #2, which is the interface that IR-level transformations
17	/// use for querying the codegen.
18	///
19	//===----------------------------------------------------------------------===//
20
21	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
24	#include "llvm/ADT/SmallBitVector.h"
25	#include "llvm/IR/FMF.h"
26	#include "llvm/IR/InstrTypes.h"
27	#include "llvm/IR/PassManager.h"
28	#include "llvm/Pass.h"
29	#include "llvm/Support/AtomicOrdering.h"
30	#include "llvm/Support/BranchProbability.h"
31	#include "llvm/Support/InstructionCost.h"
32	#include <functional>
33	#include <optional>
34	#include <utility>
35
36	namespace llvm {
37
38	namespace Intrinsic {
39	typedef unsigned ID;
40	}
41
42	class AllocaInst;
43	class AssumptionCache;
44	class BlockFrequencyInfo;
45	class DominatorTree;
46	class BranchInst;
47	class CallBase;
48	class Function;
49	class GlobalValue;
50	class InstCombiner;
51	class OptimizationRemarkEmitter;
52	class InterleavedAccessInfo;
53	class IntrinsicInst;
54	class LoadInst;
55	class Loop;
56	class LoopInfo;
57	class LoopVectorizationLegality;
58	class ProfileSummaryInfo;
59	class RecurrenceDescriptor;
60	class SCEV;
61	class ScalarEvolution;
62	class StoreInst;
63	class SwitchInst;
64	class TargetLibraryInfo;
65	class Type;
66	class User;
67	class Value;
68	class VPIntrinsic;
69	struct KnownBits;
70
71	/// Information about a load/store intrinsic defined by the target.
72	struct MemIntrinsicInfo {
73	/// This is the pointer that the intrinsic is loading from or storing to.
74	/// If this is non-null, then analysis/optimization passes can assume that
75	/// this intrinsic is functionally equivalent to a load/store from this
76	/// pointer.
77	Value PtrVal = nullptr*;
78
79	// Ordering for atomic operations.
80	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
81
82	// Same Id is set by the target for corresponding load/store intrinsics.
83	unsigned short MatchingId = `0`;
84
85	bool ReadMem = false;
86	bool WriteMem = false;
87	bool IsVolatile = false;
88
89	bool isUnordered() const {
90	return (Ordering == AtomicOrdering::NotAtomic \|\|
91	Ordering == AtomicOrdering::Unordered) &&
92	!IsVolatile;
93	}
94	};
95
96	/// Attributes of a target dependent hardware loop.
97	struct HardwareLoopInfo {
98	HardwareLoopInfo() = delete;
99	HardwareLoopInfo(Loop *L);
100	Loop L = nullptr*;
101	BasicBlock ExitBlock = nullptr*;
102	BranchInst ExitBranch = nullptr*;
103	const SCEV ExitCount = nullptr*;
104	IntegerType CountType = nullptr*;
105	Value LoopDecrement = nullptr; // Decrement the loop counter by this*
106	// value in every iteration.
107	bool IsNestingLegal = false; // Can a hardware loop be a parent to
108	// another hardware loop?
109	bool CounterInReg = false; // Should loop counter be updated in
110	// the loop via a phi?
111	bool PerformEntryTest = false; // Generate the intrinsic which also performs
112	// icmp ne zero on the loop counter value and
113	// produces an i1 to guard the loop entry.
114	bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
115	DominatorTree &DT, bool ForceNestedLoop = false,
116	bool ForceHardwareLoopPHI = false);
117	bool canAnalyze(LoopInfo &LI);
118	};
119
120	class IntrinsicCostAttributes {
121	const IntrinsicInst II = nullptr*;
122	Type RetTy = nullptr*;
123	Intrinsic::ID IID;
124	SmallVector<Type *, `4`> ParamTys;
125	SmallVector<const Value *, `4`> Arguments;
126	FastMathFlags FMF;
127	// If ScalarizationCost is UINT_MAX, the cost of scalarizing the
128	// arguments and the return value will be computed based on types.
129	InstructionCost ScalarizationCost = InstructionCost::getInvalid();
130
131	public:
132	IntrinsicCostAttributes(
133	Intrinsic::ID Id, const CallBase &CI,
134	InstructionCost ScalarCost = InstructionCost::getInvalid(),
135	bool TypeBasedOnly = false);
136
137	IntrinsicCostAttributes(
138	Intrinsic::ID Id, Type RTy, ArrayRef<Type > Tys,
139	FastMathFlags Flags = FastMathFlags (), const IntrinsicInst I = nullptr*,
140	InstructionCost ScalarCost = InstructionCost::getInvalid());
141
142	IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
143	ArrayRef<const Value *> Args);
144
145	IntrinsicCostAttributes(
146	Intrinsic::ID Id, Type RTy, ArrayRef<const* Value *> Args,
147	ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags (),
148	const IntrinsicInst I = nullptr*,
149	InstructionCost ScalarCost = InstructionCost::getInvalid());
150
151	Intrinsic::ID getID() const { return IID; }
152	const IntrinsicInst getInst() const* { return II; }
153	Type getReturnType() const* { return RetTy; }
154	FastMathFlags getFlags() const { return FMF; }
155	InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156	const SmallVectorImpl<const Value > &getArgs() const* { return Arguments; }
157	const SmallVectorImpl<Type > &getArgTypes() const* { return ParamTys; }
158
159	bool isTypeBasedOnly() const {
160	return Arguments.empty();
161	}
162
163	bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
164	};
165
166	enum class TailFoldingStyle {
167	/// Don't use tail folding
168	None,
169	/// Use predicate only to mask operations on data in the loop.
170	/// When the VL is not known to be a power-of-2, this method requires a
171	/// runtime overflow check for the i + VL in the loop because it compares the
172	/// scalar induction variable against the tripcount rounded up by VL which may
173	/// overflow. When the VL is a power-of-2, both the increment and uprounded
174	/// tripcount will overflow to 0, which does not require a runtime check
175	/// since the loop is exited when the loop induction variable equals the
176	/// uprounded trip-count, which are both 0.
177	Data,
178	/// Same as Data, but avoids using the get.active.lane.mask intrinsic to
179	/// calculate the mask and instead implements this with a
180	/// splat/stepvector/cmp.
181	/// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
182	/// active.lane.mask intrinsic when it is not natively supported?
183	DataWithoutLaneMask,
184	/// Use predicate to control both data and control flow.
185	/// This method always requires a runtime overflow check for the i + VL
186	/// increment inside the loop, because it uses the result direclty in the
187	/// active.lane.mask to calculate the mask for the next iteration. If the
188	/// increment overflows, the mask is no longer correct.
189	DataAndControlFlow,
190	/// Use predicate to control both data and control flow, but modify
191	/// the trip count so that a runtime overflow check can be avoided
192	/// and such that the scalar epilogue loop can always be removed.
193	DataAndControlFlowWithoutRuntimeCheck
194	};
195
196	struct TailFoldingInfo {
197	TargetLibraryInfo *TLI;
198	LoopVectorizationLegality *LVL;
199	InterleavedAccessInfo *IAI;
200	TailFoldingInfo(TargetLibraryInfo TLI, LoopVectorizationLegality LVL,
201	InterleavedAccessInfo *IAI)
202	: TLI(TLI), LVL(LVL), IAI(IAI) {}
203	};
204
205	class TargetTransformInfo;
206	typedef TargetTransformInfo TTI;
207
208	/// This pass provides access to the codegen interfaces that are needed
209	/// for IR-level transformations.
210	class TargetTransformInfo {
211	public:
212	/// Construct a TTI object using a type implementing the \c Concept
213	/// API below.
214	///
215	/// This is used by targets to construct a TTI wrapping their target-specific
216	/// implementation that encodes appropriate costs for their target.
217	template <typename T> TargetTransformInfo(T Impl);
218
219	/// Construct a baseline TTI object using a minimal implementation of
220	/// the \c Concept API below.
221	///
222	/// The TTI implementation will reflect the information in the DataLayout
223	/// provided if non-null.
224	explicit TargetTransformInfo(const DataLayout &DL);
225
226	// Provide move semantics.
227	TargetTransformInfo(TargetTransformInfo &&Arg);
228	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
229
230	// We need to define the destructor out-of-line to define our sub-classes
231	// out-of-line.
232	~TargetTransformInfo();
233
234	/// Handle the invalidation of this information.
235	///
236	/// When used as a result of \c TargetIRAnalysis this method will be called
237	/// when the function this was computed for changes. When it returns false,
238	/// the information is preserved across those changes.
239	bool invalidate(Function &, const PreservedAnalyses &,
240	FunctionAnalysisManager::Invalidator &) {
241	// FIXME: We should probably in some way ensure that the subtarget
242	// information for a function hasn't changed.
243	return false;
244	}
245
246	/// \name Generic Target Information
247	/// @{
248
249	/// The kind of cost model.
250	///
251	/// There are several different cost models that can be customized by the
252	/// target. The normalization of each cost model may be target specific.
253	/// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
254	/// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
255	enum TargetCostKind {
256	TCK_RecipThroughput, ///< Reciprocal throughput.
257	TCK_Latency, ///< The latency of instruction.
258	TCK_CodeSize, ///< Instruction code size.
259	TCK_SizeAndLatency ///< The weighted sum of size and latency.
260	};
261
262	/// Underlying constants for 'cost' values in this interface.
263	///
264	/// Many APIs in this interface return a cost. This enum defines the
265	/// fundamental values that should be used to interpret (and produce) those
266	/// costs. The costs are returned as an int rather than a member of this
267	/// enumeration because it is expected that the cost of one IR instruction
268	/// may have a multiplicative factor to it or otherwise won't fit directly
269	/// into the enum. Moreover, it is common to sum or average costs which works
270	/// better as simple integral values. Thus this enum only provides constants.
271	/// Also note that the returned costs are signed integers to make it natural
272	/// to add, subtract, and test with zero (a common boundary condition). It is
273	/// not expected that 2^32 is a realistic cost to be modeling at any point.
274	///
275	/// Note that these costs should usually reflect the intersection of code-size
276	/// cost and execution cost. A free instruction is typically one that folds
277	/// into another instruction. For example, reg-to-reg moves can often be
278	/// skipped by renaming the registers in the CPU, but they still are encoded
279	/// and thus wouldn't be considered 'free' here.
280	enum TargetCostConstants {
281	TCC_Free = `0`, ///< Expected to fold away in lowering.
282	TCC_Basic = `1`, ///< The cost of a typical 'add' instruction.
283	TCC_Expensive = `4` ///< The cost of a 'div' instruction on x86.
284	};
285
286	/// Estimate the cost of a GEP operation when lowered.
287	///
288	/// \p PointeeType is the source element type of the GEP.
289	/// \p Ptr is the base pointer operand.
290	/// \p Operands is the list of indices following the base pointer.
291	///
292	/// \p AccessType is a hint as to what type of memory might be accessed by
293	/// users of the GEP. getGEPCost will use it to determine if the GEP can be
294	/// folded into the addressing mode of a load/store. If AccessType is null,
295	/// then the resulting target type based off of PointeeType will be used as an
296	/// approximation.
297	InstructionCost
298	getGEPCost(Type PointeeType, const* Value *Ptr,
299	ArrayRef<const Value > Operands, Type AccessType = nullptr,
300	TargetCostKind CostKind = TCK_SizeAndLatency) const;
301
302	/// Describe known properties for a set of pointers.
303	struct PointersChainInfo {
304	/// All the GEPs in a set have same base address.
305	unsigned IsSameBaseAddress : `1`;
306	/// These properties only valid if SameBaseAddress is set.
307	/// True if all pointers are separated by a unit stride.
308	unsigned IsUnitStride : `1`;
309	/// True if distance between any two neigbouring pointers is a known value.
310	unsigned IsKnownStride : `1`;
311	unsigned Reserved : `29`;
312
313	bool isSameBase() const { return IsSameBaseAddress; }
314	bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
315	bool isKnownStride() const { return IsSameBaseAddress && IsKnownStride; }
316
317	static PointersChainInfo getUnitStride() {
318	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `1`,
319	/IsKnownStride=/.IsKnownStride: `1`, .Reserved: `0`};
320	}
321	static PointersChainInfo getKnownStride() {
322	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `0`,
323	/IsKnownStride=/.IsKnownStride: `1`, .Reserved: `0`};
324	}
325	static PointersChainInfo getUnknownStride() {
326	return {/IsSameBaseAddress=/.IsSameBaseAddress: `1`, /IsUnitStride=/.IsUnitStride: `0`,
327	/IsKnownStride=/.IsKnownStride: `0`, .Reserved: `0`};
328	}
329	};
330	static_assert(sizeof(PointersChainInfo) == `4`, "Was size increase justified?");
331
332	/// Estimate the cost of a chain of pointers (typically pointer operands of a
333	/// chain of loads or stores within same block) operations set when lowered.
334	/// \p AccessTy is the type of the loads/stores that will ultimately use the
335	/// \p Ptrs.
336	InstructionCost
337	getPointersChainCost(ArrayRef<const Value > Ptrs, const* Value *Base,
338	const PointersChainInfo &Info, Type *AccessTy,
339	TargetCostKind CostKind = TTI::TCK_RecipThroughput
340
341	) const;
342
343	/// \returns A value by which our inlining threshold should be multiplied.
344	/// This is primarily used to bump up the inlining threshold wholesale on
345	/// targets where calls are unusually expensive.
346	///
347	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
348	/// individual classes of instructions would be better.
349	unsigned getInliningThresholdMultiplier() const;
350
351	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const;
352	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const;
353
354	/// \returns A value to be added to the inlining threshold.
355	unsigned adjustInliningThreshold(const CallBase CB) const*;
356
357	/// \returns The cost of having an Alloca in the caller if not inlined, to be
358	/// added to the threshold
359	unsigned getCallerAllocaCost(const CallBase CB, const* AllocaInst AI) const*;
360
361	/// \returns Vector bonus in percent.
362	///
363	/// Vector bonuses: We want to more aggressively inline vector-dense kernels
364	/// and apply this bonus based on the percentage of vector instructions. A
365	/// bonus is applied if the vector instructions exceed 50% and half that
366	/// amount is applied if it exceeds 10%. Note that these bonuses are some what
367	/// arbitrary and evolved over time by accident as much as because they are
368	/// principled bonuses.
369	/// FIXME: It would be nice to base the bonus values on something more
370	/// scientific. A target may has no bonus on vector instructions.
371	int getInlinerVectorBonusPercent() const;
372
373	/// \return the expected cost of a memcpy, which could e.g. depend on the
374	/// source/destination type and alignment and the number of bytes copied.
375	InstructionCost getMemcpyCost(const Instruction I) const*;
376
377	/// Returns the maximum memset / memcpy size in bytes that still makes it
378	/// profitable to inline the call.
379	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const;
380
381	/// \return The estimated number of case clusters when lowering \p 'SI'.
382	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
383	/// table.
384	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
385	unsigned &JTSize,
386	ProfileSummaryInfo *PSI,
387	BlockFrequencyInfo BFI) const*;
388
389	/// Estimate the cost of a given IR user when lowered.
390	///
391	/// This can estimate the cost of either a ConstantExpr or Instruction when
392	/// lowered.
393	///
394	/// \p Operands is a list of operands which can be a result of transformations
395	/// of the current operands. The number of the operands on the list must equal
396	/// to the number of the current operands the IR user has. Their order on the
397	/// list must be the same as the order of the current operands the IR user
398	/// has.
399	///
400	/// The returned cost is defined in terms of \c TargetCostConstants, see its
401	/// comments for a detailed explanation of the cost values.
402	InstructionCost getInstructionCost(const User *U,
403	ArrayRef<const Value *> Operands,
404	TargetCostKind CostKind) const;
405
406	/// This is a helper function which calls the three-argument
407	/// getInstructionCost with \p Operands which are the current operands U has.
408	InstructionCost getInstructionCost(const User *U,
409	TargetCostKind CostKind) const {
410	SmallVector<const Value *, `4`> Operands(U->operand_values());
411	return getInstructionCost(U, Operands, CostKind);
412	}
413
414	/// If a branch or a select condition is skewed in one direction by more than
415	/// this factor, it is very likely to be predicted correctly.
416	BranchProbability getPredictableBranchThreshold() const;
417
418	/// Return true if branch divergence exists.
419	///
420	/// Branch divergence has a significantly negative impact on GPU performance
421	/// when threads in the same wavefront take different paths due to conditional
422	/// branches.
423	///
424	/// If \p F is passed, provides a context function. If \p F is known to only
425	/// execute in a single threaded environment, the target may choose to skip
426	/// uniformity analysis and assume all values are uniform.
427	bool hasBranchDivergence(const Function F = nullptr) const*;
428
429	/// Returns whether V is a source of divergence.
430	///
431	/// This function provides the target-dependent information for
432	/// the target-independent UniformityAnalysis.
433	bool isSourceOfDivergence(const Value V) const*;
434
435	// Returns true for the target specific
436	// set of operations which produce uniform result
437	// even taking non-uniform arguments
438	bool isAlwaysUniform(const Value V) const*;
439
440	/// Query the target whether the specified address space cast from FromAS to
441	/// ToAS is valid.
442	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
443
444	/// Return false if a \p AS0 address cannot possibly alias a \p AS1 address.
445	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const;
446
447	/// Returns the address space ID for a target's 'flat' address space. Note
448	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
449	/// refers to as the generic address space. The flat address space is a
450	/// generic address space that can be used access multiple segments of memory
451	/// with different address spaces. Access of a memory location through a
452	/// pointer with this address space is expected to be legal but slower
453	/// compared to the same memory location accessed through a pointer with a
454	/// different address space.
455	//
456	/// This is for targets with different pointer representations which can
457	/// be converted with the addrspacecast instruction. If a pointer is converted
458	/// to this address space, optimizations should attempt to replace the access
459	/// with the source address space.
460	///
461	/// \returns ~0u if the target does not have such a flat address space to
462	/// optimize away.
463	unsigned getFlatAddressSpace() const;
464
465	/// Return any intrinsic address operand indexes which may be rewritten if
466	/// they use a flat address space pointer.
467	///
468	/// \returns true if the intrinsic was handled.
469	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
470	Intrinsic::ID IID) const;
471
472	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
473
474	/// Return true if globals in this address space can have initializers other
475	/// than `undef`.
476	bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
477
478	unsigned getAssumedAddrSpace(const Value V) const*;
479
480	bool isSingleThreaded() const;
481
482	std::pair<const Value , unsigned*>
483	getPredicatedAddrSpace(const Value V) const*;
484
485	/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
486	/// NewV, which has a different address space. This should happen for every
487	/// operand index that collectFlatAddressOperands returned for the intrinsic.
488	/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
489	/// new value (which may be the original \p II with modified operands).
490	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
491	Value NewV) const*;
492
493	/// Test whether calls to a function lower to actual program function
494	/// calls.
495	///
496	/// The idea is to test whether the program is likely to require a 'call'
497	/// instruction or equivalent in order to call the given function.
498	///
499	/// FIXME: It's not clear that this is a good or useful query API. Client's
500	/// should probably move to simpler cost metrics using the above.
501	/// Alternatively, we could split the cost interface into distinct code-size
502	/// and execution-speed costs. This would allow modelling the core of this
503	/// query more accurately as a call is a single small instruction, but
504	/// incurs significant execution cost.
505	bool isLoweredToCall(const Function F) const*;
506
507	struct LSRCost {
508	/// TODO: Some of these could be merged. Also, a lexical ordering
509	/// isn't always optimal.
510	unsigned Insns;
511	unsigned NumRegs;
512	unsigned AddRecCost;
513	unsigned NumIVMuls;
514	unsigned NumBaseAdds;
515	unsigned ImmCost;
516	unsigned SetupCost;
517	unsigned ScaleCost;
518	};
519
520	/// Parameters that control the generic loop unrolling transformation.
521	struct UnrollingPreferences {
522	/// The cost threshold for the unrolled loop. Should be relative to the
523	/// getInstructionCost values returned by this API, and the expectation is
524	/// that the unrolled loop's instructions when run through that interface
525	/// should not exceed this cost. However, this is only an estimate. Also,
526	/// specific loops may be unrolled even with a cost above this threshold if
527	/// deemed profitable. Set this to UINT_MAX to disable the loop body cost
528	/// restriction.
529	unsigned Threshold;
530	/// If complete unrolling will reduce the cost of the loop, we will boost
531	/// the Threshold by a certain percent to allow more aggressive complete
532	/// unrolling. This value provides the maximum boost percentage that we
533	/// can apply to Threshold (The value should be no less than 100).
534	/// BoostedThreshold = Threshold min(RolledCost / UnrolledCost,*
535	/// MaxPercentThresholdBoost / 100)
536	/// E.g. if complete unrolling reduces the loop execution time by 50%
537	/// then we boost the threshold by the factor of 2x. If unrolling is not
538	/// expected to reduce the running time, then we do not increase the
539	/// threshold.
540	unsigned MaxPercentThresholdBoost;
541	/// The cost threshold for the unrolled loop when optimizing for size (set
542	/// to UINT_MAX to disable).
543	unsigned OptSizeThreshold;
544	/// The cost threshold for the unrolled loop, like Threshold, but used
545	/// for partial/runtime unrolling (set to UINT_MAX to disable).
546	unsigned PartialThreshold;
547	/// The cost threshold for the unrolled loop when optimizing for size, like
548	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
549	/// UINT_MAX to disable).
550	unsigned PartialOptSizeThreshold;
551	/// A forced unrolling factor (the number of concatenated bodies of the
552	/// original loop in the unrolled loop body). When set to 0, the unrolling
553	/// transformation will select an unrolling factor based on the current cost
554	/// threshold and other factors.
555	unsigned Count;
556	/// Default unroll count for loops with run-time trip count.
557	unsigned DefaultUnrollRuntimeCount;
558	// Set the maximum unrolling factor. The unrolling factor may be selected
559	// using the appropriate cost threshold, but may not exceed this number
560	// (set to UINT_MAX to disable). This does not apply in cases where the
561	// loop is being fully unrolled.
562	unsigned MaxCount;
563	/// Set the maximum upper bound of trip count. Allowing the MaxUpperBound
564	/// to be overrided by a target gives more flexiblity on certain cases.
565	/// By default, MaxUpperBound uses UnrollMaxUpperBound which value is 8.
566	unsigned MaxUpperBound;
567	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
568	/// applies even if full unrolling is selected. This allows a target to fall
569	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
570	unsigned FullUnrollMaxCount;
571	// Represents number of instructions optimized when "back edge"
572	// becomes "fall through" in unrolled loop.
573	// For now we count a conditional branch on a backedge and a comparison
574	// feeding it.
575	unsigned BEInsns;
576	/// Allow partial unrolling (unrolling of loops to expand the size of the
577	/// loop body, not only to eliminate small constant-trip-count loops).
578	bool Partial;
579	/// Allow runtime unrolling (unrolling of loops to expand the size of the
580	/// loop body even when the number of loop iterations is not known at
581	/// compile time).
582	bool Runtime;
583	/// Allow generation of a loop remainder (extra iterations after unroll).
584	bool AllowRemainder;
585	/// Allow emitting expensive instructions (such as divisions) when computing
586	/// the trip count of a loop for runtime unrolling.
587	bool AllowExpensiveTripCount;
588	/// Apply loop unroll on any kind of loop
589	/// (mainly to loops that fail runtime unrolling).
590	bool Force;
591	/// Allow using trip count upper bound to unroll loops.
592	bool UpperBound;
593	/// Allow unrolling of all the iterations of the runtime loop remainder.
594	bool UnrollRemainder;
595	/// Allow unroll and jam. Used to enable unroll and jam for the target.
596	bool UnrollAndJam;
597	/// Threshold for unroll and jam, for inner loop size. The 'Threshold'
598	/// value above is used during unroll and jam for the outer loop size.
599	/// This value is used in the same manner to limit the size of the inner
600	/// loop.
601	unsigned UnrollAndJamInnerLoopThreshold;
602	/// Don't allow loop unrolling to simulate more than this number of
603	/// iterations when checking full unroll profitability
604	unsigned MaxIterationsCountToAnalyze;
605	/// Don't disable runtime unroll for the loops which were vectorized.
606	bool UnrollVectorizedLoop = false;
607	};
608
609	/// Get target-customized preferences for the generic loop unrolling
610	/// transformation. The caller will initialize UP with the current
611	/// target-independent defaults.
612	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
613	UnrollingPreferences &UP,
614	OptimizationRemarkEmitter ORE) const*;
615
616	/// Query the target whether it would be profitable to convert the given loop
617	/// into a hardware loop.
618	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
619	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
620	HardwareLoopInfo &HWLoopInfo) const;
621
622	/// Query the target whether it would be prefered to create a predicated
623	/// vector loop, which can avoid the need to emit a scalar epilogue loop.
624	bool preferPredicateOverEpilogue(TailFoldingInfo TFI) const*;
625
626	/// Query the target what the preferred style of tail folding is.
627	/// \param IVUpdateMayOverflow Tells whether it is known if the IV update
628	/// may (or will never) overflow for the suggested VF/UF in the given loop.
629	/// Targets can use this information to select a more optimal tail folding
630	/// style. The value conservatively defaults to true, such that no assumptions
631	/// are made on overflow.
632	TailFoldingStyle
633	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
634
635	// Parameters that control the loop peeling transformation
636	struct PeelingPreferences {
637	/// A forced peeling factor (the number of bodied of the original loop
638	/// that should be peeled off before the loop body). When set to 0, the
639	/// a peeling factor based on profile information and other factors.
640	unsigned PeelCount;
641	/// Allow peeling off loop iterations.
642	bool AllowPeeling;
643	/// Allow peeling off loop iterations for loop nests.
644	bool AllowLoopNestsPeeling;
645	/// Allow peeling basing on profile. Uses to enable peeling off all
646	/// iterations basing on provided profile.
647	/// If the value is true the peeling cost model can decide to peel only
648	/// some iterations and in this case it will set this to false.
649	bool PeelProfiledIterations;
650	};
651
652	/// Get target-customized preferences for the generic loop peeling
653	/// transformation. The caller will initialize \p PP with the current
654	/// target-independent defaults with information from \p L and \p SE.
655	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
656	PeelingPreferences &PP) const;
657
658	/// Targets can implement their own combinations for target-specific
659	/// intrinsics. This function will be called from the InstCombine pass every
660	/// time a target-specific intrinsic is encountered.
661	///
662	/// \returns std::nullopt to not do anything target specific or a value that
663	/// will be returned from the InstCombiner. It is possible to return null and
664	/// stop further processing of the intrinsic by returning nullptr.
665	std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
666	IntrinsicInst & II) const;
667	/// Can be used to implement target-specific instruction combining.
668	/// \see instCombineIntrinsic
669	std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
670	InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
671	KnownBits & Known, bool &KnownBitsComputed) const;
672	/// Can be used to implement target-specific instruction combining.
673	/// \see instCombineIntrinsic
674	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
675	InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
676	APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
677	std::function<void(Instruction , unsigned*, APInt, APInt &)>
678	SimplifyAndSetOp) const;
679	/// @}
680
681	/// \name Scalar Target Information
682	/// @{
683
684	/// Flags indicating the kind of support for population count.
685	///
686	/// Compared to the SW implementation, HW support is supposed to
687	/// significantly boost the performance when the population is dense, and it
688	/// may or may not degrade performance if the population is sparse. A HW
689	/// support is considered as "Fast" if it can outperform, or is on a par
690	/// with, SW implementation when the population is sparse; otherwise, it is
691	/// considered as "Slow".
692	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
693
694	/// Return true if the specified immediate is legal add immediate, that
695	/// is the target has add instructions which can add a register with the
696	/// immediate without having to materialize the immediate into a register.
697	bool isLegalAddImmediate(int64_t Imm) const;
698
699	/// Return true if the specified immediate is legal icmp immediate,
700	/// that is the target has icmp instructions which can compare a register
701	/// against the immediate without having to materialize the immediate into a
702	/// register.
703	bool isLegalICmpImmediate(int64_t Imm) const;
704
705	/// Return true if the addressing mode represented by AM is legal for
706	/// this target, for a load/store of the specified type.
707	/// The type may be VoidTy, in which case only return true if the addressing
708	/// mode is legal for a load/store of any legal type.
709	/// If target returns true in LSRWithInstrQueries(), I may be valid.
710	/// TODO: Handle pre/postinc as well.
711	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
712	bool HasBaseReg, int64_t Scale,
713	unsigned AddrSpace = `0`,
714	Instruction I = nullptr) const*;
715
716	/// Return true if LSR cost of C1 is lower than C2.
717	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
718	const TargetTransformInfo::LSRCost &C2) const;
719
720	/// Return true if LSR major cost is number of registers. Targets which
721	/// implement their own isLSRCostLess and unset number of registers as major
722	/// cost should return false, otherwise return true.
723	bool isNumRegsMajorCostOfLSR() const;
724
725	/// Return true if LSR should attempts to replace a use of an otherwise dead
726	/// primary IV in the latch condition with another IV available in the loop.
727	/// When successful, makes the primary IV dead.
728	bool shouldFoldTerminatingConditionAfterLSR() const;
729
730	/// \returns true if LSR should not optimize a chain that includes \p I.
731	bool isProfitableLSRChainElement(Instruction I) const*;
732
733	/// Return true if the target can fuse a compare and branch.
734	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
735	/// calculation for the instructions in a loop.
736	bool canMacroFuseCmp() const;
737
738	/// Return true if the target can save a compare for loop count, for example
739	/// hardware loop saves a compare.
740	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
741	DominatorTree DT, AssumptionCache AC,
742	TargetLibraryInfo LibInfo) const*;
743
744	enum AddressingModeKind {
745	AMK_PreIndexed,
746	AMK_PostIndexed,
747	AMK_None
748	};
749
750	/// Return the preferred addressing mode LSR should make efforts to generate.
751	AddressingModeKind getPreferredAddressingMode(const Loop *L,
752	ScalarEvolution SE) const*;
753
754	/// Return true if the target supports masked store.
755	bool isLegalMaskedStore(Type DataType, Align Alignment) const*;
756	/// Return true if the target supports masked load.
757	bool isLegalMaskedLoad(Type DataType, Align Alignment) const*;
758
759	/// Return true if the target supports nontemporal store.
760	bool isLegalNTStore(Type DataType, Align Alignment) const*;
761	/// Return true if the target supports nontemporal load.
762	bool isLegalNTLoad(Type DataType, Align Alignment) const*;
763
764	/// \Returns true if the target supports broadcasting a load to a vector of
765	/// type <NumElements x ElementTy>.
766	bool isLegalBroadcastLoad(Type ElementTy, ElementCount NumElements) const*;
767
768	/// Return true if the target supports masked scatter.
769	bool isLegalMaskedScatter(Type DataType, Align Alignment) const*;
770	/// Return true if the target supports masked gather.
771	bool isLegalMaskedGather(Type DataType, Align Alignment) const*;
772	/// Return true if the target forces scalarizing of llvm.masked.gather
773	/// intrinsics.
774	bool forceScalarizeMaskedGather(VectorType Type, Align Alignment) const*;
775	/// Return true if the target forces scalarizing of llvm.masked.scatter
776	/// intrinsics.
777	bool forceScalarizeMaskedScatter(VectorType Type, Align Alignment) const*;
778
779	/// Return true if the target supports masked compress store.
780	bool isLegalMaskedCompressStore(Type DataType) const*;
781	/// Return true if the target supports masked expand load.
782	bool isLegalMaskedExpandLoad(Type DataType) const*;
783
784	/// Return true if the target supports strided load.
785	bool isLegalStridedLoadStore(Type DataType, Align Alignment) const*;
786
787	/// Return true if this is an alternating opcode pattern that can be lowered
788	/// to a single instruction on the target. In X86 this is for the addsub
789	/// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
790	/// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
791	/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
792	/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
793	/// \p VecTy is the vector type of the instruction to be generated.
794	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
795	const SmallBitVector &OpcodeMask) const;
796
797	/// Return true if we should be enabling ordered reductions for the target.
798	bool enableOrderedReductions() const;
799
800	/// Return true if the target has a unified operation to calculate division
801	/// and remainder. If so, the additional implicit multiplication and
802	/// subtraction required to calculate a remainder from division are free. This
803	/// can enable more aggressive transformations for division and remainder than
804	/// would typically be allowed using throughput or size cost models.
805	bool hasDivRemOp(Type DataType, bool* IsSigned) const;
806
807	/// Return true if the given instruction (assumed to be a memory access
808	/// instruction) has a volatile variant. If that's the case then we can avoid
809	/// addrspacecast to generic AS for volatile loads/stores. Default
810	/// implementation returns false, which prevents address space inference for
811	/// volatile loads/stores.
812	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) const;
813
814	/// Return true if target doesn't mind addresses in vectors.
815	bool prefersVectorizedAddressing() const;
816
817	/// Return the cost of the scaling factor used in the addressing
818	/// mode represented by AM for this target, for a load/store
819	/// of the specified type.
820	/// If the AM is supported, the return value must be >= 0.
821	/// If the AM is not supported, it returns a negative value.
822	/// TODO: Handle pre/postinc as well.
823	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
824	int64_t BaseOffset, bool HasBaseReg,
825	int64_t Scale,
826	unsigned AddrSpace = `0`) const;
827
828	/// Return true if the loop strength reduce pass should make
829	/// Instruction based TTI queries to isLegalAddressingMode(). This is*
830	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
831	/// immediate offset and no index register.
832	bool LSRWithInstrQueries() const;
833
834	/// Return true if it's free to truncate a value of type Ty1 to type
835	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
836	/// by referencing its sub-register AX.
837	bool isTruncateFree(Type Ty1, Type Ty2) const;
838
839	/// Return true if it is profitable to hoist instruction in the
840	/// then/else to before if.
841	bool isProfitableToHoist(Instruction I) const*;
842
843	bool useAA() const;
844
845	/// Return true if this type is legal.
846	bool isTypeLegal(Type Ty) const*;
847
848	/// Returns the estimated number of registers required to represent \p Ty.
849	unsigned getRegUsageForType(Type Ty) const*;
850
851	/// Return true if switches should be turned into lookup tables for the
852	/// target.
853	bool shouldBuildLookupTables() const;
854
855	/// Return true if switches should be turned into lookup tables
856	/// containing this constant value for the target.
857	bool shouldBuildLookupTablesForConstant(Constant C) const*;
858
859	/// Return true if lookup tables should be turned into relative lookup tables.
860	bool shouldBuildRelLookupTables() const;
861
862	/// Return true if the input function which is cold at all call sites,
863	/// should use coldcc calling convention.
864	bool useColdCCForColdCall(Function &F) const;
865
866	/// Estimate the overhead of scalarizing an instruction. Insert and Extract
867	/// are set if the demanded result elements need to be inserted and/or
868	/// extracted from vectors.
869	InstructionCost getScalarizationOverhead(VectorType *Ty,
870	const APInt &DemandedElts,
871	bool Insert, bool Extract,
872	TTI::TargetCostKind CostKind) const;
873
874	/// Estimate the overhead of scalarizing an instructions unique
875	/// non-constant operands. The (potentially vector) types to use for each of
876	/// argument are passes via Tys.
877	InstructionCost
878	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
879	ArrayRef<Type *> Tys,
880	TTI::TargetCostKind CostKind) const;
881
882	/// If target has efficient vector element load/store instructions, it can
883	/// return true here so that insertion/extraction costs are not added to
884	/// the scalarization cost of a load/store.
885	bool supportsEfficientVectorElementLoadStore() const;
886
887	/// If the target supports tail calls.
888	bool supportsTailCalls() const;
889
890	/// If target supports tail call on \p CB
891	bool supportsTailCallFor(const CallBase CB) const*;
892
893	/// Don't restrict interleaved unrolling to small loops.
894	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
895
896	/// Returns options for expansion of memcmp. IsZeroCmp is
897	// true if this is the expansion of memcmp(p1, p2, s) == 0.
898	struct MemCmpExpansionOptions {
899	// Return true if memcmp expansion is enabled.
900	operator bool() const { return MaxNumLoads > `0`; }
901
902	// Maximum number of load operations.
903	unsigned MaxNumLoads = `0`;
904
905	// The list of available load sizes (in bytes), sorted in decreasing order.
906	SmallVector<unsigned, `8`> LoadSizes;
907
908	// For memcmp expansion when the memcmp result is only compared equal or
909	// not-equal to 0, allow up to this number of load pairs per block. As an
910	// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
911	// a0 = load2bytes &a[0]
912	// b0 = load2bytes &b[0]
913	// a2 = load1byte &a[2]
914	// b2 = load1byte &b[2]
915	// r = cmp eq (a0 ^ b0 \| a2 ^ b2), 0
916	unsigned NumLoadsPerBlock = `1`;
917
918	// Set to true to allow overlapping loads. For example, 7-byte compares can
919	// be done with two 4-byte compares instead of 4+2+1-byte compares. This
920	// requires all loads in LoadSizes to be doable in an unaligned way.
921	bool AllowOverlappingLoads = false;
922
923	// Sometimes, the amount of data that needs to be compared is smaller than
924	// the standard register size, but it cannot be loaded with just one load
925	// instruction. For example, if the size of the memory comparison is 6
926	// bytes, we can handle it more efficiently by loading all 6 bytes in a
927	// single block and generating an 8-byte number, instead of generating two
928	// separate blocks with conditional jumps for 4 and 2 byte loads. This
929	// approach simplifies the process and produces the comparison result as
930	// normal. This array lists the allowed sizes of memcmp tails that can be
931	// merged into one block
932	SmallVector<unsigned, `4`> AllowedTailExpansions;
933	};
934	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
935	bool IsZeroCmp) const;
936
937	/// Should the Select Optimization pass be enabled and ran.
938	bool enableSelectOptimize() const;
939
940	/// Should the Select Optimization pass treat the given instruction like a
941	/// select, potentially converting it to a conditional branch. This can
942	/// include select-like instructions like or(zext(c), x) that can be converted
943	/// to selects.
944	bool shouldTreatInstructionLikeSelect(const Instruction I) const*;
945
946	/// Enable matching of interleaved access groups.
947	bool enableInterleavedAccessVectorization() const;
948
949	/// Enable matching of interleaved access groups that contain predicated
950	/// accesses or gaps and therefore vectorized using masked
951	/// vector loads/stores.
952	bool enableMaskedInterleavedAccessVectorization() const;
953
954	/// Indicate that it is potentially unsafe to automatically vectorize
955	/// floating-point operations because the semantics of vector and scalar
956	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
957	/// does not support IEEE-754 denormal numbers, while depending on the
958	/// platform, scalar floating-point math does.
959	/// This applies to floating-point math operations and calls, not memory
960	/// operations, shuffles, or casts.
961	bool isFPVectorizationPotentiallyUnsafe() const;
962
963	/// Determine if the target supports unaligned memory accesses.
964	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
965	unsigned AddressSpace = `0`,
966	Align Alignment = Align (`1`),
967	unsigned Fast = nullptr) const*;
968
969	/// Return hardware support for population count.
970	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
971
972	/// Return true if the hardware has a fast square-root instruction.
973	bool haveFastSqrt(Type Ty) const*;
974
975	/// Return true if the cost of the instruction is too high to speculatively
976	/// execute and should be kept behind a branch.
977	/// This normally just wraps around a getInstructionCost() call, but some
978	/// targets might report a low TCK_SizeAndLatency value that is incompatible
979	/// with the fixed TCC_Expensive value.
980	/// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
981	bool isExpensiveToSpeculativelyExecute(const Instruction I) const*;
982
983	/// Return true if it is faster to check if a floating-point value is NaN
984	/// (or not-NaN) versus a comparison against a constant FP zero value.
985	/// Targets should override this if materializing a 0.0 for comparison is
986	/// generally as cheap as checking for ordered/unordered.
987	bool isFCmpOrdCheaperThanFCmpZero(Type Ty) const*;
988
989	/// Return the expected cost of supporting the floating point operation
990	/// of the specified type.
991	InstructionCost getFPOpCost(Type Ty) const*;
992
993	/// Return the expected cost of materializing for the given integer
994	/// immediate of the specified type.
995	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
996	TargetCostKind CostKind) const;
997
998	/// Return the expected cost of materialization for the given integer
999	/// immediate of the specified type for a given instruction. The cost can be
1000	/// zero if the immediate can be folded into the specified instruction.
1001	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1002	const APInt &Imm, Type *Ty,
1003	TargetCostKind CostKind,
1004	Instruction Inst = nullptr) const*;
1005	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1006	const APInt &Imm, Type *Ty,
1007	TargetCostKind CostKind) const;
1008
1009	/// Return the expected cost for the given integer when optimising
1010	/// for size. This is different than the other integer immediate cost
1011	/// functions in that it is subtarget agnostic. This is useful when you e.g.
1012	/// target one ISA such as Aarch32 but smaller encodings could be possible
1013	/// with another such as Thumb. This return value is used as a penalty when
1014	/// the total costs for a constant is calculated (the bigger the cost, the
1015	/// more beneficial constant hoisting is).
1016	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1017	const APInt &Imm, Type Ty) const*;
1018
1019	/// It can be advantageous to detach complex constants from their uses to make
1020	/// their generation cheaper. This hook allows targets to report when such
1021	/// transformations might negatively effect the code generation of the
1022	/// underlying operation. The motivating example is divides whereby hoisting
1023	/// constants prevents the code generator's ability to transform them into
1024	/// combinations of simpler operations.
1025	bool preferToKeepConstantsAttached(const Instruction &Inst,
1026	const Function &Fn) const;
1027
1028	/// @}
1029
1030	/// \name Vector Target Information
1031	/// @{
1032
1033	/// The various kinds of shuffle patterns for vector queries.
1034	enum ShuffleKind {
1035	SK_Broadcast, ///< Broadcast element 0 to all other elements.
1036	SK_Reverse, ///< Reverse the order of the vector.
1037	SK_Select, ///< Selects elements from the corresponding lane of
1038	///< either source operand. This is equivalent to a
1039	///< vector select with a constant condition operand.
1040	SK_Transpose, ///< Transpose two vectors.
1041	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
1042	SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
1043	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
1044	///< with any shuffle mask.
1045	SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
1046	///< shuffle mask.
1047	SK_Splice ///< Concatenates elements from the first input vector
1048	///< with elements of the second input vector. Returning
1049	///< a vector of the same type as the input vectors.
1050	///< Index indicates start offset in first input vector.
1051	};
1052
1053	/// Additional information about an operand's possible values.
1054	enum OperandValueKind {
1055	OK_AnyValue, // Operand can have any value.
1056	OK_UniformValue, // Operand is uniform (splat of a value).
1057	OK_UniformConstantValue, // Operand is uniform constant.
1058	OK_NonUniformConstantValue // Operand is a non uniform constant value.
1059	};
1060
1061	/// Additional properties of an operand's values.
1062	enum OperandValueProperties {
1063	OP_None = `0`,
1064	OP_PowerOf2 = `1`,
1065	OP_NegatedPowerOf2 = `2`,
1066	};
1067
1068	// Describe the values an operand can take. We're in the process
1069	// of migrating uses of OperandValueKind and OperandValueProperties
1070	// to use this class, and then will change the internal representation.
1071	struct OperandValueInfo {
1072	OperandValueKind Kind = OK_AnyValue;
1073	OperandValueProperties Properties = OP_None;
1074
1075	bool isConstant() const {
1076	return Kind == OK_UniformConstantValue \|\| Kind == OK_NonUniformConstantValue;
1077	}
1078	bool isUniform() const {
1079	return Kind == OK_UniformConstantValue \|\| Kind == OK_UniformValue;
1080	}
1081	bool isPowerOf2() const {
1082	return Properties == OP_PowerOf2;
1083	}
1084	bool isNegatedPowerOf2() const {
1085	return Properties == OP_NegatedPowerOf2;
1086	}
1087
1088	OperandValueInfo getNoProps() const {
1089	return {.Kind: Kind, .Properties: OP_None};
1090	}
1091	};
1092
1093	/// \return the number of registers in the target-provided register class.
1094	unsigned getNumberOfRegisters(unsigned ClassID) const;
1095
1096	/// \return the target-provided register class ID for the provided type,
1097	/// accounting for type promotion and other type-legalization techniques that
1098	/// the target might apply. However, it specifically does not account for the
1099	/// scalarization or splitting of vector types. Should a vector type require
1100	/// scalarization or splitting into multiple underlying vector registers, that
1101	/// type should be mapped to a register class containing no registers.
1102	/// Specifically, this is designed to provide a simple, high-level view of the
1103	/// register allocation later performed by the backend. These register classes
1104	/// don't necessarily map onto the register classes used by the backend.
1105	/// FIXME: It's not currently possible to determine how many registers
1106	/// are used by the provided type.
1107	unsigned getRegisterClassForType(bool Vector, Type Ty = nullptr) const*;
1108
1109	/// \return the target-provided register class name
1110	const char getRegisterClassName(unsigned* ClassID) const;
1111
1112	enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector };
1113
1114	/// \return The width of the largest scalar or vector register type.
1115	TypeSize getRegisterBitWidth(RegisterKind K) const;
1116
1117	/// \return The width of the smallest vector register type.
1118	unsigned getMinVectorRegisterBitWidth() const;
1119
1120	/// \return The maximum value of vscale if the target specifies an
1121	/// architectural maximum vector length, and std::nullopt otherwise.
1122	std::optional<unsigned> getMaxVScale() const;
1123
1124	/// \return the value of vscale to tune the cost model for.
1125	std::optional<unsigned> getVScaleForTuning() const;
1126
1127	/// \return true if vscale is known to be a power of 2
1128	bool isVScaleKnownToBeAPowerOfTwo() const;
1129
1130	/// \return True if the vectorization factor should be chosen to
1131	/// make the vector of the smallest element type match the size of a
1132	/// vector register. For wider element types, this could result in
1133	/// creating vectors that span multiple vector registers.
1134	/// If false, the vectorization factor will be chosen based on the
1135	/// size of the widest element type.
1136	/// \p K Register Kind for vectorization.
1137	bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const;
1138
1139	/// \return The minimum vectorization factor for types of given element
1140	/// bit width, or 0 if there is no minimum VF. The returned value only
1141	/// applies when shouldMaximizeVectorBandwidth returns true.
1142	/// If IsScalable is true, the returned ElementCount must be a scalable VF.
1143	ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1144
1145	/// \return The maximum vectorization factor for types of given element
1146	/// bit width and opcode, or 0 if there is no maximum VF.
1147	/// Currently only used by the SLP vectorizer.
1148	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1149
1150	/// \return The minimum vectorization factor for the store instruction. Given
1151	/// the initial estimation of the minimum vector factor and store value type,
1152	/// it tries to find possible lowest VF, which still might be profitable for
1153	/// the vectorization.
1154	/// \param VF Initial estimation of the minimum vector factor.
1155	/// \param ScalarMemTy Scalar memory type of the store operation.
1156	/// \param ScalarValTy Scalar type of the stored value.
1157	/// Currently only used by the SLP vectorizer.
1158	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1159	Type ScalarValTy) const*;
1160
1161	/// \return True if it should be considered for address type promotion.
1162	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1163	/// profitable without finding other extensions fed by the same input.
1164	bool shouldConsiderAddressTypePromotion(
1165	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1166
1167	/// \return The size of a cache line in bytes.
1168	unsigned getCacheLineSize() const;
1169
1170	/// The possible cache levels
1171	enum class CacheLevel {
1172	L1D, // The L1 data cache
1173	L2D, // The L2 data cache
1174
1175	// We currently do not model L3 caches, as their sizes differ widely between
1176	// microarchitectures. Also, we currently do not have a use for L3 cache
1177	// size modeling yet.
1178	};
1179
1180	/// \return The size of the cache level in bytes, if available.
1181	std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1182
1183	/// \return The associativity of the cache level, if available.
1184	std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1185
1186	/// \return The minimum architectural page size for the target.
1187	std::optional<unsigned> getMinPageSize() const;
1188
1189	/// \return How much before a load we should place the prefetch
1190	/// instruction. This is currently measured in number of
1191	/// instructions.
1192	unsigned getPrefetchDistance() const;
1193
1194	/// Some HW prefetchers can handle accesses up to a certain constant stride.
1195	/// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1196	/// and the arguments provided are meant to serve as a basis for deciding this
1197	/// for a particular loop.
1198	///
1199	/// \param NumMemAccesses Number of memory accesses in the loop.
1200	/// \param NumStridedMemAccesses Number of the memory accesses that
1201	/// ScalarEvolution could find a known stride
1202	/// for.
1203	/// \param NumPrefetches Number of software prefetches that will be
1204	/// emitted as determined by the addresses
1205	/// involved and the cache line size.
1206	/// \param HasCall True if the loop contains a call.
1207	///
1208	/// \return This is the minimum stride in bytes where it makes sense to start
1209	/// adding SW prefetches. The default is 1, i.e. prefetch with any
1210	/// stride.
1211	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1212	unsigned NumStridedMemAccesses,
1213	unsigned NumPrefetches, bool HasCall) const;
1214
1215	/// \return The maximum number of iterations to prefetch ahead. If
1216	/// the required number of iterations is more than this number, no
1217	/// prefetching is performed.
1218	unsigned getMaxPrefetchIterationsAhead() const;
1219
1220	/// \return True if prefetching should also be done for writes.
1221	bool enableWritePrefetching() const;
1222
1223	/// \return if target want to issue a prefetch in address space \p AS.
1224	bool shouldPrefetchAddressSpace(unsigned AS) const;
1225
1226	/// \return The maximum interleave factor that any transform should try to
1227	/// perform for this target. This number depends on the level of parallelism
1228	/// and the number of execution units in the CPU.
1229	unsigned getMaxInterleaveFactor(ElementCount VF) const;
1230
1231	/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1232	static OperandValueInfo getOperandInfo(const Value *V);
1233
1234	/// This is an approximation of reciprocal throughput of a math/logic op.
1235	/// A higher cost indicates less expected throughput.
1236	/// From Agner Fog's guides, reciprocal throughput is "the average number of
1237	/// clock cycles per instruction when the instructions are not part of a
1238	/// limiting dependency chain."
1239	/// Therefore, costs should be scaled to account for multiple execution units
1240	/// on the target that can process this type of instruction. For example, if
1241	/// there are 5 scalar integer units and 2 vector integer units that can
1242	/// calculate an 'add' in a single cycle, this model should indicate that the
1243	/// cost of the vector add instruction is 2.5 times the cost of the scalar
1244	/// add instruction.
1245	/// \p Args is an optional argument which holds the instruction operands
1246	/// values so the TTI can analyze those values searching for special
1247	/// cases or optimizations based on those values.
1248	/// \p CxtI is the optional original context instruction, if one exists, to
1249	/// provide even more information.
1250	InstructionCost getArithmeticInstrCost(
1251	unsigned Opcode, Type *Ty,
1252	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1253	TTI::OperandValueInfo Opd1Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1254	TTI::OperandValueInfo Opd2Info = {.Kind: TTI::OK_AnyValue, .Properties: TTI::OP_None},
1255	ArrayRef<const Value > Args = ArrayRef<const* Value *>(),
1256	const Instruction CxtI = nullptr) const*;
1257
1258	/// Returns the cost estimation for alternating opcode pattern that can be
1259	/// lowered to a single instruction on the target. In X86 this is for the
1260	/// addsub instruction which corrsponds to a Shuffle + Fadd + FSub pattern in
1261	/// IR. This function expects two opcodes: \p Opcode1 and \p Opcode2 being
1262	/// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
1263	/// when \p Opcode0 is selected and `1` when Opcode1 is selected.
1264	/// \p VecTy is the vector type of the instruction to be generated.
1265	InstructionCost getAltInstrCost(
1266	VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
1267	const SmallBitVector &OpcodeMask,
1268	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1269
1270	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1271	/// The exact mask may be passed as Mask, or else the array will be empty.
1272	/// The index and subtype parameters are used by the subvector insertion and
1273	/// extraction shuffle kinds to show the insert/extract point and the type of
1274	/// the subvector being inserted/extracted. The operands of the shuffle can be
1275	/// passed through \p Args, which helps improve the cost estimation in some
1276	/// cases, like in broadcast loads.
1277	/// NOTE: For subvector extractions Tp represents the source type.
1278	InstructionCost
1279	getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1280	ArrayRef<int> Mask = std::nullopt,
1281	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1282	int Index = `0`, VectorType SubTp = nullptr*,
1283	ArrayRef<const Value > Args = std::nullopt) const*;
1284
1285	/// Represents a hint about the context in which a cast is used.
1286	///
1287	/// For zext/sext, the context of the cast is the operand, which must be a
1288	/// load of some kind. For trunc, the context is of the cast is the single
1289	/// user of the instruction, which must be a store of some kind.
1290	///
1291	/// This enum allows the vectorizer to give getCastInstrCost an idea of the
1292	/// type of cast it's dealing with, as not every cast is equal. For instance,
1293	/// the zext of a load may be free, but the zext of an interleaving load can
1294	//// be (very) expensive!
1295	///
1296	/// See \c getCastContextHint to compute a CastContextHint from a cast
1297	/// Instruction. Callers can use it if they don't need to override the*
1298	/// context and just want it to be calculated from the instruction.
1299	///
1300	/// FIXME: This handles the types of load/store that the vectorizer can
1301	/// produce, which are the cases where the context instruction is most
1302	/// likely to be incorrect. There are other situations where that can happen
1303	/// too, which might be handled here but in the long run a more general
1304	/// solution of costing multiple instructions at the same times may be better.
1305	enum class CastContextHint : uint8_t {
1306	None, ///< The cast is not used with a load/store of any kind.
1307	Normal, ///< The cast is used with a normal load/store.
1308	Masked, ///< The cast is used with a masked load/store.
1309	GatherScatter, ///< The cast is used with a gather/scatter.
1310	Interleave, ///< The cast is used with an interleaved load/store.
1311	Reversed, ///< The cast is used with a reversed load/store.
1312	};
1313
1314	/// Calculates a CastContextHint from \p I.
1315	/// This should be used by callers of getCastInstrCost if they wish to
1316	/// determine the context from some instruction.
1317	/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1318	/// or if it's another type of cast.
1319	static CastContextHint getCastContextHint(const Instruction *I);
1320
1321	/// \return The expected cost of cast instructions, such as bitcast, trunc,
1322	/// zext, etc. If there is an existing instruction that holds Opcode, it
1323	/// may be passed in the 'I' parameter.
1324	InstructionCost
1325	getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
1326	TTI::CastContextHint CCH,
1327	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1328	const Instruction I = nullptr) const*;
1329
1330	/// \return The expected cost of a sign- or zero-extended vector extract. Use
1331	/// Index = -1 to indicate that there is no information about the index value.
1332	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1333	VectorType *VecTy,
1334	unsigned Index) const;
1335
1336	/// \return The expected cost of control-flow related instructions such as
1337	/// Phi, Ret, Br, Switch.
1338	InstructionCost
1339	getCFInstrCost(unsigned Opcode,
1340	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
1341	const Instruction I = nullptr) const*;
1342
1343	/// \returns The expected cost of compare and select instructions. If there
1344	/// is an existing instruction that holds Opcode, it may be passed in the
1345	/// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1346	/// is using a compare with the specified predicate as condition. When vector
1347	/// types are passed, \p VecPred must be used for all lanes.
1348	InstructionCost
1349	getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
1350	CmpInst::Predicate VecPred,
1351	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1352	const Instruction I = nullptr) const*;
1353
1354	/// \return The expected cost of vector Insert and Extract.
1355	/// Use -1 to indicate that there is no information on the index value.
1356	/// This is used when the instruction is not available; a typical use
1357	/// case is to provision the cost of vectorization/scalarization in
1358	/// vectorizer passes.
1359	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1360	TTI::TargetCostKind CostKind,
1361	unsigned Index = -`1`, Value Op0 = nullptr*,
1362	Value Op1 = nullptr) const*;
1363
1364	/// \return The expected cost of vector Insert and Extract.
1365	/// This is used when instruction is available, and implementation
1366	/// asserts 'I' is not nullptr.
1367	///
1368	/// A typical suitable use case is cost estimation when vector instruction
1369	/// exists (e.g., from basic blocks during transformation).
1370	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
1371	TTI::TargetCostKind CostKind,
1372	unsigned Index = -`1`) const;
1373
1374	/// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1375	/// \p ReplicationFactor times.
1376	///
1377	/// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1378	/// <0,0,0,1,1,1,2,2,2,3,3,3>
1379	InstructionCost getReplicationShuffleCost(Type EltTy, int* ReplicationFactor,
1380	int VF,
1381	const APInt &DemandedDstElts,
1382	TTI::TargetCostKind CostKind);
1383
1384	/// \return The cost of Load and Store instructions.
1385	InstructionCost
1386	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1387	unsigned AddressSpace,
1388	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1389	OperandValueInfo OpdInfo = {.Kind: OK_AnyValue, .Properties: OP_None},
1390	const Instruction I = nullptr) const*;
1391
1392	/// \return The cost of VP Load and Store instructions.
1393	InstructionCost
1394	getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1395	unsigned AddressSpace,
1396	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1397	const Instruction I = nullptr) const*;
1398
1399	/// \return The cost of masked Load and Store instructions.
1400	InstructionCost getMaskedMemoryOpCost(
1401	unsigned Opcode, Type Src, Align Alignment, unsigned* AddressSpace,
1402	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1403
1404	/// \return The cost of Gather or Scatter operation
1405	/// \p Opcode - is a type of memory access Load or Store
1406	/// \p DataTy - a vector type of the data to be loaded or stored
1407	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1408	/// \p VariableMask - true when the memory access is predicated with a mask
1409	/// that is not a compile-time constant
1410	/// \p Alignment - alignment of single element
1411	/// \p I - the optional original context instruction, if one exists, e.g. the
1412	/// load/store to transform or the call to the gather/scatter intrinsic
1413	InstructionCost getGatherScatterOpCost(
1414	unsigned Opcode, Type DataTy, const* Value Ptr, bool* VariableMask,
1415	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1416	const Instruction I = nullptr) const*;
1417
1418	/// \return The cost of strided memory operations.
1419	/// \p Opcode - is a type of memory access Load or Store
1420	/// \p DataTy - a vector type of the data to be loaded or stored
1421	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1422	/// \p VariableMask - true when the memory access is predicated with a mask
1423	/// that is not a compile-time constant
1424	/// \p Alignment - alignment of single element
1425	/// \p I - the optional original context instruction, if one exists, e.g. the
1426	/// load/store to transform or the call to the gather/scatter intrinsic
1427	InstructionCost getStridedMemoryOpCost(
1428	unsigned Opcode, Type DataTy, const* Value Ptr, bool* VariableMask,
1429	Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1430	const Instruction I = nullptr) const*;
1431
1432	/// \return The cost of the interleaved memory operation.
1433	/// \p Opcode is the memory operation code
1434	/// \p VecTy is the vector type of the interleaved access.
1435	/// \p Factor is the interleave factor
1436	/// \p Indices is the indices for interleaved load members (as interleaved
1437	/// load allows gaps)
1438	/// \p Alignment is the alignment of the memory operation
1439	/// \p AddressSpace is address space of the pointer.
1440	/// \p UseMaskForCond indicates if the memory access is predicated.
1441	/// \p UseMaskForGaps indicates if gaps should be masked.
1442	InstructionCost getInterleavedMemoryOpCost(
1443	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
1444	Align Alignment, unsigned AddressSpace,
1445	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
1446	bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1447
1448	/// A helper function to determine the type of reduction algorithm used
1449	/// for a given \p Opcode and set of FastMathFlags \p FMF.
1450	static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1451	return FMF && !(*FMF).allowReassoc();
1452	}
1453
1454	/// Calculate the cost of vector reduction intrinsics.
1455	///
1456	/// This is the cost of reducing the vector value of type \p Ty to a scalar
1457	/// value using the operation denoted by \p Opcode. The FastMathFlags
1458	/// parameter \p FMF indicates what type of reduction we are performing:
1459	/// 1. Tree-wise. This is the typical 'fast' reduction performed that
1460	/// involves successively splitting a vector into half and doing the
1461	/// operation on the pair of halves until you have a scalar value. For
1462	/// example:
1463	/// (v0, v1, v2, v3)
1464	/// ((v0+v2), (v1+v3), undef, undef)
1465	/// ((v0+v2+v1+v3), undef, undef, undef)
1466	/// This is the default behaviour for integer operations, whereas for
1467	/// floating point we only do this if \p FMF indicates that
1468	/// reassociation is allowed.
1469	/// 2. Ordered. For a vector with N elements this involves performing N
1470	/// operations in lane order, starting with an initial scalar value, i.e.
1471	/// result = InitVal + v0
1472	/// result = result + v1
1473	/// result = result + v2
1474	/// result = result + v3
1475	/// This is only the case for FP operations and when reassociation is not
1476	/// allowed.
1477	///
1478	InstructionCost getArithmeticReductionCost(
1479	unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1480	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1481
1482	InstructionCost getMinMaxReductionCost(
1483	Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF = FastMathFlags (),
1484	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1485
1486	/// Calculate the cost of an extended reduction pattern, similar to
1487	/// getArithmeticReductionCost of an Add reduction with multiply and optional
1488	/// extensions. This is the cost of as:
1489	/// ResTy vecreduce.add(mul (A, B)).
1490	/// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1491	InstructionCost getMulAccReductionCost(
1492	bool IsUnsigned, Type ResTy, VectorType Ty,
1493	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1494
1495	/// Calculate the cost of an extended reduction pattern, similar to
1496	/// getArithmeticReductionCost of a reduction with an extension.
1497	/// This is the cost of as:
1498	/// ResTy vecreduce.opcode(ext(Ty A)).
1499	InstructionCost getExtendedReductionCost(
1500	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
1501	FastMathFlags FMF,
1502	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
1503
1504	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1505	/// Three cases are handled: 1. scalar instruction 2. vector instruction
1506	/// 3. scalar instruction which is to be vectorized.
1507	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1508	TTI::TargetCostKind CostKind) const;
1509
1510	/// \returns The cost of Call instructions.
1511	InstructionCost getCallInstrCost(
1512	Function F, Type RetTy, ArrayRef<Type *> Tys,
1513	TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
1514
1515	/// \returns The number of pieces into which the provided type must be
1516	/// split during legalization. Zero is returned when the answer is unknown.
1517	unsigned getNumberOfParts(Type Tp) const*;
1518
1519	/// \returns The cost of the address computation. For most targets this can be
1520	/// merged into the instruction indexing mode. Some targets might want to
1521	/// distinguish between address computation for memory operations on vector
1522	/// types and scalar types. Such targets should override this function.
1523	/// The 'SE' parameter holds pointer for the scalar evolution object which
1524	/// is used in order to get the Ptr step value in case of constant stride.
1525	/// The 'Ptr' parameter holds SCEV of the access pointer.
1526	InstructionCost getAddressComputationCost(Type *Ty,
1527	ScalarEvolution SE = nullptr*,
1528	const SCEV Ptr = nullptr) const*;
1529
1530	/// \returns The cost, if any, of keeping values of the given types alive
1531	/// over a callsite.
1532	///
1533	/// Some types may require the use of register classes that do not have
1534	/// any callee-saved registers, so would require a spill and fill.
1535	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type > Tys) const*;
1536
1537	/// \returns True if the intrinsic is a supported memory intrinsic. Info
1538	/// will contain additional information - whether the intrinsic may write
1539	/// or read to memory, volatility and the pointer. Info is undefined
1540	/// if false is returned.
1541	bool getTgtMemIntrinsic(IntrinsicInst Inst, MemIntrinsicInfo &Info) const*;
1542
1543	/// \returns The maximum element size, in bytes, for an element
1544	/// unordered-atomic memory intrinsic.
1545	unsigned getAtomicMemIntrinsicMaxElementSize() const;
1546
1547	/// \returns A value which is the result of the given memory intrinsic. New
1548	/// instructions may be created to extract the result from the given intrinsic
1549	/// memory operation. Returns nullptr if the target cannot create a result
1550	/// from the given intrinsic.
1551	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
1552	Type ExpectedType) const*;
1553
1554	/// \returns The type to use in a loop expansion of a memcpy call.
1555	Type *getMemcpyLoopLoweringType(
1556	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
1557	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1558	std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1559
1560	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1561	/// \param RemainingBytes The number of bytes to copy.
1562	///
1563	/// Calculates the operand types to use when copying \p RemainingBytes of
1564	/// memory, where source and destination alignments are \p SrcAlign and
1565	/// \p DestAlign respectively.
1566	void getMemcpyLoopResidualLoweringType(
1567	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1568	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1569	unsigned SrcAlign, unsigned DestAlign,
1570	std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1571
1572	/// \returns True if the two functions have compatible attributes for inlining
1573	/// purposes.
1574	bool areInlineCompatible(const Function *Caller,
1575	const Function Callee) const*;
1576
1577	/// Returns a penalty for invoking call \p Call in \p F.
1578	/// For example, if a function F calls a function G, which in turn calls
1579	/// function H, then getInlineCallPenalty(F, H()) would return the
1580	/// penalty of calling H from F, e.g. after inlining G into F.
1581	/// \p DefaultCallPenalty is passed to give a default penalty that
1582	/// the target can amend or override.
1583	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
1584	unsigned DefaultCallPenalty) const;
1585
1586	/// \returns True if the caller and callee agree on how \p Types will be
1587	/// passed to or returned from the callee.
1588	/// to the callee.
1589	/// \param Types List of types to check.
1590	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
1591	const ArrayRef<Type > &Types) const*;
1592
1593	/// The type of load/store indexing.
1594	enum MemIndexedMode {
1595	MIM_Unindexed, ///< No indexing.
1596	MIM_PreInc, ///< Pre-incrementing.
1597	MIM_PreDec, ///< Pre-decrementing.
1598	MIM_PostInc, ///< Post-incrementing.
1599	MIM_PostDec ///< Post-decrementing.
1600	};
1601
1602	/// \returns True if the specified indexed load for the given type is legal.
1603	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type Ty) const*;
1604
1605	/// \returns True if the specified indexed store for the given type is legal.
1606	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type Ty) const*;
1607
1608	/// \returns The bitwidth of the largest vector type that should be used to
1609	/// load/store in the given address space.
1610	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1611
1612	/// \returns True if the load instruction is legal to vectorize.
1613	bool isLegalToVectorizeLoad(LoadInst LI) const*;
1614
1615	/// \returns True if the store instruction is legal to vectorize.
1616	bool isLegalToVectorizeStore(StoreInst SI) const*;
1617
1618	/// \returns True if it is legal to vectorize the given load chain.
1619	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1620	unsigned AddrSpace) const;
1621
1622	/// \returns True if it is legal to vectorize the given store chain.
1623	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1624	unsigned AddrSpace) const;
1625
1626	/// \returns True if it is legal to vectorize the given reduction kind.
1627	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
1628	ElementCount VF) const;
1629
1630	/// \returns True if the given type is supported for scalable vectors
1631	bool isElementTypeLegalForScalableVector(Type Ty) const*;
1632
1633	/// \returns The new vector factor value if the target doesn't support \p
1634	/// SizeInBytes loads or has a better vector factor.
1635	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1636	unsigned ChainSizeInBytes,
1637	VectorType VecTy) const*;
1638
1639	/// \returns The new vector factor value if the target doesn't support \p
1640	/// SizeInBytes stores or has a better vector factor.
1641	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1642	unsigned ChainSizeInBytes,
1643	VectorType VecTy) const*;
1644
1645	/// Flags describing the kind of vector reduction.
1646	struct ReductionFlags {
1647	ReductionFlags() = default;
1648	bool IsMaxOp =
1649	false; ///< If the op a min/max kind, true if it's a max operation.
1650	bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1651	bool NoNaN =
1652	false; ///< If op is an fp min/max, whether NaNs may be present.
1653	};
1654
1655	/// \returns True if the target prefers reductions in loop.
1656	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1657	ReductionFlags Flags) const;
1658
1659	/// \returns True if the target prefers reductions select kept in the loop
1660	/// when tail folding. i.e.
1661	/// loop:
1662	/// p = phi (0, s)
1663	/// a = add (p, x)
1664	/// s = select (mask, a, p)
1665	/// vecreduce.add(s)
1666	///
1667	/// As opposed to the normal scheme of p = phi (0, a) which allows the select
1668	/// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1669	/// by the target, this can lead to cleaner code generation.
1670	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1671	ReductionFlags Flags) const;
1672
1673	/// Return true if the loop vectorizer should consider vectorizing an
1674	/// otherwise scalar epilogue loop.
1675	bool preferEpilogueVectorization() const;
1676
1677	/// \returns True if the target wants to expand the given reduction intrinsic
1678	/// into a shuffle sequence.
1679	bool shouldExpandReduction(const IntrinsicInst II) const*;
1680
1681	/// \returns the size cost of rematerializing a GlobalValue address relative
1682	/// to a stack reload.
1683	unsigned getGISelRematGlobalCost() const;
1684
1685	/// \returns the lower bound of a trip count to decide on vectorization
1686	/// while tail-folding.
1687	unsigned getMinTripCountTailFoldingThreshold() const;
1688
1689	/// \returns True if the target supports scalable vectors.
1690	bool supportsScalableVectors() const;
1691
1692	/// \return true when scalable vectorization is preferred.
1693	bool enableScalableVectorization() const;
1694
1695	/// \name Vector Predication Information
1696	/// @{
1697	/// Whether the target supports the %evl parameter of VP intrinsic efficiently
1698	/// in hardware, for the given opcode and type/alignment. (see LLVM Language
1699	/// Reference - "Vector Predication Intrinsics").
1700	/// Use of %evl is discouraged when that is not the case.
1701	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1702	Align Alignment) const;
1703
1704	struct VPLegalization {
1705	enum VPTransform {
1706	// keep the predicating parameter
1707	Legal = `0`,
1708	// where legal, discard the predicate parameter
1709	Discard = `1`,
1710	// transform into something else that is also predicating
1711	Convert = `2`
1712	};
1713
1714	// How to transform the EVL parameter.
1715	// Legal: keep the EVL parameter as it is.
1716	// Discard: Ignore the EVL parameter where it is safe to do so.
1717	// Convert: Fold the EVL into the mask parameter.
1718	VPTransform EVLParamStrategy;
1719
1720	// How to transform the operator.
1721	// Legal: The target supports this operator.
1722	// Convert: Convert this to a non-VP operation.
1723	// The 'Discard' strategy is invalid.
1724	VPTransform OpStrategy;
1725
1726	bool shouldDoNothing() const {
1727	return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1728	}
1729	VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)
1730	: EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {}
1731	};
1732
1733	/// \returns How the target needs this vector-predicated operation to be
1734	/// transformed.
1735	VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const;
1736	/// @}
1737
1738	/// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1739	/// state.
1740	///
1741	/// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1742	/// node containing a jump table in a format suitable for the target, so it
1743	/// needs to know what format of jump table it can legally use.
1744	///
1745	/// For non-Arm targets, this function isn't used. It defaults to returning
1746	/// false, but it shouldn't matter what it returns anyway.
1747	bool hasArmWideBranch(bool Thumb) const;
1748
1749	/// \return The maximum number of function arguments the target supports.
1750	unsigned getMaxNumArgs() const;
1751
1752	/// @}
1753
1754	private:
1755	/// The abstract base class used to type erase specific TTI
1756	/// implementations.
1757	class Concept;
1758
1759	/// The template model for the base class which wraps a concrete
1760	/// implementation in a type erased interface.
1761	template <typename T> class Model;
1762
1763	std::unique_ptr<Concept> TTIImpl;
1764	};
1765
1766	class TargetTransformInfo::Concept {
1767	public:
1768	virtual ~Concept() = `0`;
1769	virtual const DataLayout &getDataLayout() const = `0`;
1770	virtual InstructionCost getGEPCost(Type PointeeType, const* Value *Ptr,
1771	ArrayRef<const Value *> Operands,
1772	Type *AccessType,
1773	TTI::TargetCostKind CostKind) = `0`;
1774	virtual InstructionCost
1775	getPointersChainCost(ArrayRef<const Value > Ptrs, const* Value *Base,
1776	const TTI::PointersChainInfo &Info, Type *AccessTy,
1777	TTI::TargetCostKind CostKind) = `0`;
1778	virtual unsigned getInliningThresholdMultiplier() const = `0`;
1779	virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const = `0`;
1780	virtual unsigned
1781	getInliningCostBenefitAnalysisProfitableMultiplier() const = `0`;
1782	virtual unsigned adjustInliningThreshold(const CallBase *CB) = `0`;
1783	virtual int getInlinerVectorBonusPercent() const = `0`;
1784	virtual unsigned getCallerAllocaCost(const CallBase *CB,
1785	const AllocaInst AI) const* = `0`;
1786	virtual InstructionCost getMemcpyCost(const Instruction *I) = `0`;
1787	virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const = `0`;
1788	virtual unsigned
1789	getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
1790	ProfileSummaryInfo *PSI,
1791	BlockFrequencyInfo *BFI) = `0`;
1792	virtual InstructionCost getInstructionCost(const User *U,
1793	ArrayRef<const Value *> Operands,
1794	TargetCostKind CostKind) = `0`;
1795	virtual BranchProbability getPredictableBranchThreshold() = `0`;
1796	virtual bool hasBranchDivergence(const Function F = nullptr*) = `0`;
1797	virtual bool isSourceOfDivergence(const Value *V) = `0`;
1798	virtual bool isAlwaysUniform(const Value *V) = `0`;
1799	virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = `0`;
1800	virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const = `0`;
1801	virtual unsigned getFlatAddressSpace() = `0`;
1802	virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
1803	Intrinsic::ID IID) const = `0`;
1804	virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = `0`;
1805	virtual bool
1806	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const = `0`;
1807	virtual unsigned getAssumedAddrSpace(const Value V) const* = `0`;
1808	virtual bool isSingleThreaded() const = `0`;
1809	virtual std::pair<const Value , unsigned*>
1810	getPredicatedAddrSpace(const Value V) const* = `0`;
1811	virtual Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II,
1812	Value *OldV,
1813	Value NewV) const* = `0`;
1814	virtual bool isLoweredToCall(const Function *F) = `0`;
1815	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1816	UnrollingPreferences &UP,
1817	OptimizationRemarkEmitter *ORE) = `0`;
1818	virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
1819	PeelingPreferences &PP) = `0`;
1820	virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
1821	AssumptionCache &AC,
1822	TargetLibraryInfo *LibInfo,
1823	HardwareLoopInfo &HWLoopInfo) = `0`;
1824	virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) = `0`;
1825	virtual TailFoldingStyle
1826	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = `0`;
1827	virtual std::optional<Instruction *> instCombineIntrinsic(
1828	InstCombiner &IC, IntrinsicInst &II) = `0`;
1829	virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1830	InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1831	KnownBits & Known, bool &KnownBitsComputed) = `0`;
1832	virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1833	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1834	APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1835	std::function<void(Instruction , unsigned*, APInt, APInt &)>
1836	SimplifyAndSetOp) = `0`;
1837	virtual bool isLegalAddImmediate(int64_t Imm) = `0`;
1838	virtual bool isLegalICmpImmediate(int64_t Imm) = `0`;
1839	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
1840	int64_t BaseOffset, bool HasBaseReg,
1841	int64_t Scale, unsigned AddrSpace,
1842	Instruction *I) = `0`;
1843	virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
1844	const TargetTransformInfo::LSRCost &C2) = `0`;
1845	virtual bool isNumRegsMajorCostOfLSR() = `0`;
1846	virtual bool shouldFoldTerminatingConditionAfterLSR() const = `0`;
1847	virtual bool isProfitableLSRChainElement(Instruction *I) = `0`;
1848	virtual bool canMacroFuseCmp() = `0`;
1849	virtual bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE,
1850	LoopInfo LI, DominatorTree DT, AssumptionCache *AC,
1851	TargetLibraryInfo *LibInfo) = `0`;
1852	virtual AddressingModeKind
1853	getPreferredAddressingMode(const Loop L, ScalarEvolution SE) const = `0`;
1854	virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = `0`;
1855	virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = `0`;
1856	virtual bool isLegalNTStore(Type *DataType, Align Alignment) = `0`;
1857	virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = `0`;
1858	virtual bool isLegalBroadcastLoad(Type *ElementTy,
1859	ElementCount NumElements) const = `0`;
1860	virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = `0`;
1861	virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = `0`;
1862	virtual bool forceScalarizeMaskedGather(VectorType *DataType,
1863	Align Alignment) = `0`;
1864	virtual bool forceScalarizeMaskedScatter(VectorType *DataType,
1865	Align Alignment) = `0`;
1866	virtual bool isLegalMaskedCompressStore(Type *DataType) = `0`;
1867	virtual bool isLegalMaskedExpandLoad(Type *DataType) = `0`;
1868	virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) = `0`;
1869	virtual bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0,
1870	unsigned Opcode1,
1871	const SmallBitVector &OpcodeMask) const = `0`;
1872	virtual bool enableOrderedReductions() = `0`;
1873	virtual bool hasDivRemOp(Type DataType, bool* IsSigned) = `0`;
1874	virtual bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) = `0`;
1875	virtual bool prefersVectorizedAddressing() = `0`;
1876	virtual InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
1877	int64_t BaseOffset,
1878	bool HasBaseReg, int64_t Scale,
1879	unsigned AddrSpace) = `0`;
1880	virtual bool LSRWithInstrQueries() = `0`;
1881	virtual bool isTruncateFree(Type Ty1, Type Ty2) = `0`;
1882	virtual bool isProfitableToHoist(Instruction *I) = `0`;
1883	virtual bool useAA() = `0`;
1884	virtual bool isTypeLegal(Type *Ty) = `0`;
1885	virtual unsigned getRegUsageForType(Type *Ty) = `0`;
1886	virtual bool shouldBuildLookupTables() = `0`;
1887	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = `0`;
1888	virtual bool shouldBuildRelLookupTables() = `0`;
1889	virtual bool useColdCCForColdCall(Function &F) = `0`;
1890	virtual InstructionCost getScalarizationOverhead(VectorType *Ty,
1891	const APInt &DemandedElts,
1892	bool Insert, bool Extract,
1893	TargetCostKind CostKind) = `0`;
1894	virtual InstructionCost
1895	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1896	ArrayRef<Type *> Tys,
1897	TargetCostKind CostKind) = `0`;
1898	virtual bool supportsEfficientVectorElementLoadStore() = `0`;
1899	virtual bool supportsTailCalls() = `0`;
1900	virtual bool supportsTailCallFor(const CallBase *CB) = `0`;
1901	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = `0`;
1902	virtual MemCmpExpansionOptions
1903	enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = `0`;
1904	virtual bool enableSelectOptimize() = `0`;
1905	virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) = `0`;
1906	virtual bool enableInterleavedAccessVectorization() = `0`;
1907	virtual bool enableMaskedInterleavedAccessVectorization() = `0`;
1908	virtual bool isFPVectorizationPotentiallyUnsafe() = `0`;
1909	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1910	unsigned BitWidth,
1911	unsigned AddressSpace,
1912	Align Alignment,
1913	unsigned *Fast) = `0`;
1914	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = `0`;
1915	virtual bool haveFastSqrt(Type *Ty) = `0`;
1916	virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) = `0`;
1917	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = `0`;
1918	virtual InstructionCost getFPOpCost(Type *Ty) = `0`;
1919	virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1920	const APInt &Imm, Type *Ty) = `0`;
1921	virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1922	TargetCostKind CostKind) = `0`;
1923	virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1924	const APInt &Imm, Type *Ty,
1925	TargetCostKind CostKind,
1926	Instruction Inst = nullptr*) = `0`;
1927	virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
1928	const APInt &Imm, Type *Ty,
1929	TargetCostKind CostKind) = `0`;
1930	virtual bool preferToKeepConstantsAttached(const Instruction &Inst,
1931	const Function &Fn) const = `0`;
1932	virtual unsigned getNumberOfRegisters(unsigned ClassID) const = `0`;
1933	virtual unsigned getRegisterClassForType(bool Vector,
1934	Type Ty = nullptr) const* = `0`;
1935	virtual const char getRegisterClassName(unsigned* ClassID) const = `0`;
1936	virtual TypeSize getRegisterBitWidth(RegisterKind K) const = `0`;
1937	virtual unsigned getMinVectorRegisterBitWidth() const = `0`;
1938	virtual std::optional<unsigned> getMaxVScale() const = `0`;
1939	virtual std::optional<unsigned> getVScaleForTuning() const = `0`;
1940	virtual bool isVScaleKnownToBeAPowerOfTwo() const = `0`;
1941	virtual bool
1942	shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const = `0`;
1943	virtual ElementCount getMinimumVF(unsigned ElemWidth,
1944	bool IsScalable) const = `0`;
1945	virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = `0`;
1946	virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1947	Type ScalarValTy) const* = `0`;
1948	virtual bool shouldConsiderAddressTypePromotion(
1949	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = `0`;
1950	virtual unsigned getCacheLineSize() const = `0`;
1951	virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = `0`;
1952	virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1953	const = `0`;
1954	virtual std::optional<unsigned> getMinPageSize() const = `0`;
1955
1956	/// \return How much before a load we should place the prefetch
1957	/// instruction. This is currently measured in number of
1958	/// instructions.
1959	virtual unsigned getPrefetchDistance() const = `0`;
1960
1961	/// \return Some HW prefetchers can handle accesses up to a certain
1962	/// constant stride. This is the minimum stride in bytes where it
1963	/// makes sense to start adding SW prefetches. The default is 1,
1964	/// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1965	/// even below the HW prefetcher limit, and the arguments provided are
1966	/// meant to serve as a basis for deciding this for a particular loop.
1967	virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1968	unsigned NumStridedMemAccesses,
1969	unsigned NumPrefetches,
1970	bool HasCall) const = `0`;
1971
1972	/// \return The maximum number of iterations to prefetch ahead. If
1973	/// the required number of iterations is more than this number, no
1974	/// prefetching is performed.
1975	virtual unsigned getMaxPrefetchIterationsAhead() const = `0`;
1976
1977	/// \return True if prefetching should also be done for writes.
1978	virtual bool enableWritePrefetching() const = `0`;
1979
1980	/// \return if target want to issue a prefetch in address space \p AS.
1981	virtual bool shouldPrefetchAddressSpace(unsigned AS) const = `0`;
1982
1983	virtual unsigned getMaxInterleaveFactor(ElementCount VF) = `0`;
1984	virtual InstructionCost getArithmeticInstrCost(
1985	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1986	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1987	ArrayRef<const Value > Args, const* Instruction CxtI = nullptr*) = `0`;
1988	virtual InstructionCost getAltInstrCost(
1989	VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
1990	const SmallBitVector &OpcodeMask,
1991	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const = `0`;
1992
1993	virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
1994	ArrayRef<int> Mask,
1995	TTI::TargetCostKind CostKind,
1996	int Index, VectorType *SubTp,
1997	ArrayRef<const Value *> Args) = `0`;
1998	virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1999	Type *Src, CastContextHint CCH,
2000	TTI::TargetCostKind CostKind,
2001	const Instruction *I) = `0`;
2002	virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2003	VectorType *VecTy,
2004	unsigned Index) = `0`;
2005	virtual InstructionCost getCFInstrCost(unsigned Opcode,
2006	TTI::TargetCostKind CostKind,
2007	const Instruction I = nullptr*) = `0`;
2008	virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
2009	Type *CondTy,
2010	CmpInst::Predicate VecPred,
2011	TTI::TargetCostKind CostKind,
2012	const Instruction *I) = `0`;
2013	virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2014	TTI::TargetCostKind CostKind,
2015	unsigned Index, Value *Op0,
2016	Value *Op1) = `0`;
2017	virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2018	TTI::TargetCostKind CostKind,
2019	unsigned Index) = `0`;
2020
2021	virtual InstructionCost
2022	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
2023	const APInt &DemandedDstElts,
2024	TTI::TargetCostKind CostKind) = `0`;
2025
2026	virtual InstructionCost
2027	getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2028	unsigned AddressSpace, TTI::TargetCostKind CostKind,
2029	OperandValueInfo OpInfo, const Instruction *I) = `0`;
2030	virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
2031	Align Alignment,
2032	unsigned AddressSpace,
2033	TTI::TargetCostKind CostKind,
2034	const Instruction *I) = `0`;
2035	virtual InstructionCost
2036	getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2037	unsigned AddressSpace,
2038	TTI::TargetCostKind CostKind) = `0`;
2039	virtual InstructionCost
2040	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2041	bool VariableMask, Align Alignment,
2042	TTI::TargetCostKind CostKind,
2043	const Instruction I = nullptr*) = `0`;
2044	virtual InstructionCost
2045	getStridedMemoryOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2046	bool VariableMask, Align Alignment,
2047	TTI::TargetCostKind CostKind,
2048	const Instruction I = nullptr*) = `0`;
2049
2050	virtual InstructionCost getInterleavedMemoryOpCost(
2051	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
2052	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2053	bool UseMaskForCond = false, bool UseMaskForGaps = false) = `0`;
2054	virtual InstructionCost
2055	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2056	std::optional<FastMathFlags> FMF,
2057	TTI::TargetCostKind CostKind) = `0`;
2058	virtual InstructionCost
2059	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2060	TTI::TargetCostKind CostKind) = `0`;
2061	virtual InstructionCost getExtendedReductionCost(
2062	unsigned Opcode, bool IsUnsigned, Type ResTy, VectorType Ty,
2063	FastMathFlags FMF,
2064	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = `0`;
2065	virtual InstructionCost getMulAccReductionCost(
2066	bool IsUnsigned, Type ResTy, VectorType Ty,
2067	TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = `0`;
2068	virtual InstructionCost
2069	getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2070	TTI::TargetCostKind CostKind) = `0`;
2071	virtual InstructionCost getCallInstrCost(Function F, Type RetTy,
2072	ArrayRef<Type *> Tys,
2073	TTI::TargetCostKind CostKind) = `0`;
2074	virtual unsigned getNumberOfParts(Type *Tp) = `0`;
2075	virtual InstructionCost
2076	getAddressComputationCost(Type Ty, ScalarEvolution SE, const SCEV *Ptr) = `0`;
2077	virtual InstructionCost
2078	getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = `0`;
2079	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2080	MemIntrinsicInfo &Info) = `0`;
2081	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = `0`;
2082	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
2083	Type *ExpectedType) = `0`;
2084	virtual Type *getMemcpyLoopLoweringType(
2085	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
2086	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2087	std::optional<uint32_t> AtomicElementSize) const = `0`;
2088
2089	virtual void getMemcpyLoopResidualLoweringType(
2090	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2091	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2092	unsigned SrcAlign, unsigned DestAlign,
2093	std::optional<uint32_t> AtomicCpySize) const = `0`;
2094	virtual bool areInlineCompatible(const Function *Caller,
2095	const Function Callee) const* = `0`;
2096	virtual unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
2097	unsigned DefaultCallPenalty) const = `0`;
2098	virtual bool areTypesABICompatible(const Function *Caller,
2099	const Function *Callee,
2100	const ArrayRef<Type > &Types) const* = `0`;
2101	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
2102	virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* = `0`;
2103	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = `0`;
2104	virtual bool isLegalToVectorizeLoad(LoadInst LI) const* = `0`;
2105	virtual bool isLegalToVectorizeStore(StoreInst SI) const* = `0`;
2106	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
2107	Align Alignment,
2108	unsigned AddrSpace) const = `0`;
2109	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
2110	Align Alignment,
2111	unsigned AddrSpace) const = `0`;
2112	virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2113	ElementCount VF) const = `0`;
2114	virtual bool isElementTypeLegalForScalableVector(Type Ty) const* = `0`;
2115	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2116	unsigned ChainSizeInBytes,
2117	VectorType VecTy) const* = `0`;
2118	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2119	unsigned ChainSizeInBytes,
2120	VectorType VecTy) const* = `0`;
2121	virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2122	ReductionFlags) const = `0`;
2123	virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2124	ReductionFlags) const = `0`;
2125	virtual bool preferEpilogueVectorization() const = `0`;
2126
2127	virtual bool shouldExpandReduction(const IntrinsicInst II) const* = `0`;
2128	virtual unsigned getGISelRematGlobalCost() const = `0`;
2129	virtual unsigned getMinTripCountTailFoldingThreshold() const = `0`;
2130	virtual bool enableScalableVectorization() const = `0`;
2131	virtual bool supportsScalableVectors() const = `0`;
2132	virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2133	Align Alignment) const = `0`;
2134	virtual VPLegalization
2135	getVPLegalizationStrategy(const VPIntrinsic &PI) const = `0`;
2136	virtual bool hasArmWideBranch(bool Thumb) const = `0`;
2137	virtual unsigned getMaxNumArgs() const = `0`;
2138	};
2139
2140	template <typename T>
2141	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2142	T Impl;
2143
2144	public:
2145	Model(T Impl) : Impl(std::move(Impl)) {}
2146	~Model() override = default;
2147
2148	const DataLayout &getDataLayout() const override {
2149	return Impl.getDataLayout();
2150	}
2151
2152	InstructionCost
2153	getGEPCost(Type PointeeType, const* Value *Ptr,
2154	ArrayRef<const Value > Operands, Type AccessType,
2155	TargetTransformInfo::TargetCostKind CostKind) override {
2156	return Impl.getGEPCost(PointeeType, Ptr, Operands, AccessType, CostKind);
2157	}
2158	InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2159	const Value *Base,
2160	const PointersChainInfo &Info,
2161	Type *AccessTy,
2162	TargetCostKind CostKind) override {
2163	return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2164	}
2165	unsigned getInliningThresholdMultiplier() const override {
2166	return Impl.getInliningThresholdMultiplier();
2167	}
2168	unsigned adjustInliningThreshold(const CallBase *CB) override {
2169	return Impl.adjustInliningThreshold(CB);
2170	}
2171	unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const override {
2172	return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2173	}
2174	unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const override {
2175	return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2176	}
2177	int getInlinerVectorBonusPercent() const override {
2178	return Impl.getInlinerVectorBonusPercent();
2179	}
2180	unsigned getCallerAllocaCost(const CallBase *CB,
2181	const AllocaInst AI) const* override {
2182	return Impl.getCallerAllocaCost(CB, AI);
2183	}
2184	InstructionCost getMemcpyCost(const Instruction *I) override {
2185	return Impl.getMemcpyCost(I);
2186	}
2187
2188	uint64_t getMaxMemIntrinsicInlineSizeThreshold() const override {
2189	return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2190	}
2191
2192	InstructionCost getInstructionCost(const User *U,
2193	ArrayRef<const Value *> Operands,
2194	TargetCostKind CostKind) override {
2195	return Impl.getInstructionCost(U, Operands, CostKind);
2196	}
2197	BranchProbability getPredictableBranchThreshold() override {
2198	return Impl.getPredictableBranchThreshold();
2199	}
2200	bool hasBranchDivergence(const Function F = nullptr*) override {
2201	return Impl.hasBranchDivergence(F);
2202	}
2203	bool isSourceOfDivergence(const Value *V) override {
2204	return Impl.isSourceOfDivergence(V);
2205	}
2206
2207	bool isAlwaysUniform(const Value *V) override {
2208	return Impl.isAlwaysUniform(V);
2209	}
2210
2211	bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2212	return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2213	}
2214
2215	bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const override {
2216	return Impl.addrspacesMayAlias(AS0, AS1);
2217	}
2218
2219	unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2220
2221	bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2222	Intrinsic::ID IID) const override {
2223	return Impl.collectFlatAddressOperands(OpIndexes, IID);
2224	}
2225
2226	bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2227	return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2228	}
2229
2230	bool
2231	canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2232	return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2233	}
2234
2235	unsigned getAssumedAddrSpace(const Value V) const* override {
2236	return Impl.getAssumedAddrSpace(V);
2237	}
2238
2239	bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2240
2241	std::pair<const Value , unsigned*>
2242	getPredicatedAddrSpace(const Value V) const* override {
2243	return Impl.getPredicatedAddrSpace(V);
2244	}
2245
2246	Value rewriteIntrinsicWithAddressSpace(IntrinsicInst II, Value *OldV,
2247	Value NewV) const* override {
2248	return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2249	}
2250
2251	bool isLoweredToCall(const Function *F) override {
2252	return Impl.isLoweredToCall(F);
2253	}
2254	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2255	UnrollingPreferences &UP,
2256	OptimizationRemarkEmitter *ORE) override {
2257	return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2258	}
2259	void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2260	PeelingPreferences &PP) override {
2261	return Impl.getPeelingPreferences(L, SE, PP);
2262	}
2263	bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2264	AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2265	HardwareLoopInfo &HWLoopInfo) override {
2266	return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2267	}
2268	bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2269	return Impl.preferPredicateOverEpilogue(TFI);
2270	}
2271	TailFoldingStyle
2272	getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2273	return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2274	}
2275	std::optional<Instruction *>
2276	instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2277	return Impl.instCombineIntrinsic(IC, II);
2278	}
2279	std::optional<Value *>
2280	simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2281	APInt DemandedMask, KnownBits &Known,
2282	bool &KnownBitsComputed) override {
2283	return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2284	KnownBitsComputed);
2285	}
2286	std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2287	InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2288	APInt &UndefElts2, APInt &UndefElts3,
2289	std::function<void(Instruction , unsigned*, APInt, APInt &)>
2290	SimplifyAndSetOp) override {
2291	return Impl.simplifyDemandedVectorEltsIntrinsic(
2292	IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2293	SimplifyAndSetOp);
2294	}
2295	bool isLegalAddImmediate(int64_t Imm) override {
2296	return Impl.isLegalAddImmediate(Imm);
2297	}
2298	bool isLegalICmpImmediate(int64_t Imm) override {
2299	return Impl.isLegalICmpImmediate(Imm);
2300	}
2301	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
2302	bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2303	Instruction *I) override {
2304	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2305	AddrSpace, I);
2306	}
2307	bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2308	const TargetTransformInfo::LSRCost &C2) override {
2309	return Impl.isLSRCostLess(C1, C2);
2310	}
2311	bool isNumRegsMajorCostOfLSR() override {
2312	return Impl.isNumRegsMajorCostOfLSR();
2313	}
2314	bool shouldFoldTerminatingConditionAfterLSR() const override {
2315	return Impl.shouldFoldTerminatingConditionAfterLSR();
2316	}
2317	bool isProfitableLSRChainElement(Instruction *I) override {
2318	return Impl.isProfitableLSRChainElement(I);
2319	}
2320	bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2321	bool canSaveCmp(Loop L, BranchInst BI, ScalarEvolution SE, LoopInfo *LI,
2322	DominatorTree DT, AssumptionCache AC,
2323	TargetLibraryInfo *LibInfo) override {
2324	return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2325	}
2326	AddressingModeKind
2327	getPreferredAddressingMode(const Loop *L,
2328	ScalarEvolution SE) const* override {
2329	return Impl.getPreferredAddressingMode(L, SE);
2330	}
2331	bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2332	return Impl.isLegalMaskedStore(DataType, Alignment);
2333	}
2334	bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2335	return Impl.isLegalMaskedLoad(DataType, Alignment);
2336	}
2337	bool isLegalNTStore(Type *DataType, Align Alignment) override {
2338	return Impl.isLegalNTStore(DataType, Alignment);
2339	}
2340	bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2341	return Impl.isLegalNTLoad(DataType, Alignment);
2342	}
2343	bool isLegalBroadcastLoad(Type *ElementTy,
2344	ElementCount NumElements) const override {
2345	return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2346	}
2347	bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2348	return Impl.isLegalMaskedScatter(DataType, Alignment);
2349	}
2350	bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2351	return Impl.isLegalMaskedGather(DataType, Alignment);
2352	}
2353	bool forceScalarizeMaskedGather(VectorType *DataType,
2354	Align Alignment) override {
2355	return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2356	}
2357	bool forceScalarizeMaskedScatter(VectorType *DataType,
2358	Align Alignment) override {
2359	return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2360	}
2361	bool isLegalMaskedCompressStore(Type *DataType) override {
2362	return Impl.isLegalMaskedCompressStore(DataType);
2363	}
2364	bool isLegalMaskedExpandLoad(Type *DataType) override {
2365	return Impl.isLegalMaskedExpandLoad(DataType);
2366	}
2367	bool isLegalStridedLoadStore(Type *DataType, Align Alignment) override {
2368	return Impl.isLegalStridedLoadStore(DataType, Alignment);
2369	}
2370	bool isLegalAltInstr(VectorType VecTy, unsigned* Opcode0, unsigned Opcode1,
2371	const SmallBitVector &OpcodeMask) const override {
2372	return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2373	}
2374	bool enableOrderedReductions() override {
2375	return Impl.enableOrderedReductions();
2376	}
2377	bool hasDivRemOp(Type DataType, bool* IsSigned) override {
2378	return Impl.hasDivRemOp(DataType, IsSigned);
2379	}
2380	bool hasVolatileVariant(Instruction I, unsigned* AddrSpace) override {
2381	return Impl.hasVolatileVariant(I, AddrSpace);
2382	}
2383	bool prefersVectorizedAddressing() override {
2384	return Impl.prefersVectorizedAddressing();
2385	}
2386	InstructionCost getScalingFactorCost(Type Ty, GlobalValue BaseGV,
2387	int64_t BaseOffset, bool HasBaseReg,
2388	int64_t Scale,
2389	unsigned AddrSpace) override {
2390	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2391	AddrSpace);
2392	}
2393	bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2394	bool isTruncateFree(Type Ty1, Type Ty2) override {
2395	return Impl.isTruncateFree(Ty1, Ty2);
2396	}
2397	bool isProfitableToHoist(Instruction *I) override {
2398	return Impl.isProfitableToHoist(I);
2399	}
2400	bool useAA() override { return Impl.useAA(); }
2401	bool isTypeLegal(Type Ty) override { return* Impl.isTypeLegal(Ty); }
2402	unsigned getRegUsageForType(Type *Ty) override {
2403	return Impl.getRegUsageForType(Ty);
2404	}
2405	bool shouldBuildLookupTables() override {
2406	return Impl.shouldBuildLookupTables();
2407	}
2408	bool shouldBuildLookupTablesForConstant(Constant *C) override {
2409	return Impl.shouldBuildLookupTablesForConstant(C);
2410	}
2411	bool shouldBuildRelLookupTables() override {
2412	return Impl.shouldBuildRelLookupTables();
2413	}
2414	bool useColdCCForColdCall(Function &F) override {
2415	return Impl.useColdCCForColdCall(F);
2416	}
2417
2418	InstructionCost getScalarizationOverhead(VectorType *Ty,
2419	const APInt &DemandedElts,
2420	bool Insert, bool Extract,
2421	TargetCostKind CostKind) override {
2422	return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2423	CostKind);
2424	}
2425	InstructionCost
2426	getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2427	ArrayRef<Type *> Tys,
2428	TargetCostKind CostKind) override {
2429	return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2430	}
2431
2432	bool supportsEfficientVectorElementLoadStore() override {
2433	return Impl.supportsEfficientVectorElementLoadStore();
2434	}
2435
2436	bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2437	bool supportsTailCallFor(const CallBase *CB) override {
2438	return Impl.supportsTailCallFor(CB);
2439	}
2440
2441	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2442	return Impl.enableAggressiveInterleaving(LoopHasReductions);
2443	}
2444	MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2445	bool IsZeroCmp) const override {
2446	return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2447	}
2448	bool enableSelectOptimize() override {
2449	return Impl.enableSelectOptimize();
2450	}
2451	bool shouldTreatInstructionLikeSelect(const Instruction *I) override {
2452	return Impl.shouldTreatInstructionLikeSelect(I);
2453	}
2454	bool enableInterleavedAccessVectorization() override {
2455	return Impl.enableInterleavedAccessVectorization();
2456	}
2457	bool enableMaskedInterleavedAccessVectorization() override {
2458	return Impl.enableMaskedInterleavedAccessVectorization();
2459	}
2460	bool isFPVectorizationPotentiallyUnsafe() override {
2461	return Impl.isFPVectorizationPotentiallyUnsafe();
2462	}
2463	bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2464	unsigned AddressSpace, Align Alignment,
2465	unsigned *Fast) override {
2466	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2467	Alignment, Fast);
2468	}
2469	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2470	return Impl.getPopcntSupport(IntTyWidthInBit);
2471	}
2472	bool haveFastSqrt(Type Ty) override { return* Impl.haveFastSqrt(Ty); }
2473
2474	bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2475	return Impl.isExpensiveToSpeculativelyExecute(I);
2476	}
2477
2478	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2479	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2480	}
2481
2482	InstructionCost getFPOpCost(Type *Ty) override {
2483	return Impl.getFPOpCost(Ty);
2484	}
2485
2486	InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2487	const APInt &Imm, Type *Ty) override {
2488	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2489	}
2490	InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2491	TargetCostKind CostKind) override {
2492	return Impl.getIntImmCost(Imm, Ty, CostKind);
2493	}
2494	InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2495	const APInt &Imm, Type *Ty,
2496	TargetCostKind CostKind,
2497	Instruction Inst = nullptr*) override {
2498	return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2499	}
2500	InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2501	const APInt &Imm, Type *Ty,
2502	TargetCostKind CostKind) override {
2503	return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2504	}
2505	bool preferToKeepConstantsAttached(const Instruction &Inst,
2506	const Function &Fn) const override {
2507	return Impl.preferToKeepConstantsAttached(Inst, Fn);
2508	}
2509	unsigned getNumberOfRegisters(unsigned ClassID) const override {
2510	return Impl.getNumberOfRegisters(ClassID);
2511	}
2512	unsigned getRegisterClassForType(bool Vector,
2513	Type Ty = nullptr) const* override {
2514	return Impl.getRegisterClassForType(Vector, Ty);
2515	}
2516	const char getRegisterClassName(unsigned* ClassID) const override {
2517	return Impl.getRegisterClassName(ClassID);
2518	}
2519	TypeSize getRegisterBitWidth(RegisterKind K) const override {
2520	return Impl.getRegisterBitWidth(K);
2521	}
2522	unsigned getMinVectorRegisterBitWidth() const override {
2523	return Impl.getMinVectorRegisterBitWidth();
2524	}
2525	std::optional<unsigned> getMaxVScale() const override {
2526	return Impl.getMaxVScale();
2527	}
2528	std::optional<unsigned> getVScaleForTuning() const override {
2529	return Impl.getVScaleForTuning();
2530	}
2531	bool isVScaleKnownToBeAPowerOfTwo() const override {
2532	return Impl.isVScaleKnownToBeAPowerOfTwo();
2533	}
2534	bool shouldMaximizeVectorBandwidth(
2535	TargetTransformInfo::RegisterKind K) const override {
2536	return Impl.shouldMaximizeVectorBandwidth(K);
2537	}
2538	ElementCount getMinimumVF(unsigned ElemWidth,
2539	bool IsScalable) const override {
2540	return Impl.getMinimumVF(ElemWidth, IsScalable);
2541	}
2542	unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2543	return Impl.getMaximumVF(ElemWidth, Opcode);
2544	}
2545	unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2546	Type ScalarValTy) const* override {
2547	return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2548	}
2549	bool shouldConsiderAddressTypePromotion(
2550	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2551	return Impl.shouldConsiderAddressTypePromotion(
2552	I, AllowPromotionWithoutCommonHeader);
2553	}
2554	unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2555	std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2556	return Impl.getCacheSize(Level);
2557	}
2558	std::optional<unsigned>
2559	getCacheAssociativity(CacheLevel Level) const override {
2560	return Impl.getCacheAssociativity(Level);
2561	}
2562
2563	std::optional<unsigned> getMinPageSize() const override {
2564	return Impl.getMinPageSize();
2565	}
2566
2567	/// Return the preferred prefetch distance in terms of instructions.
2568	///
2569	unsigned getPrefetchDistance() const override {
2570	return Impl.getPrefetchDistance();
2571	}
2572
2573	/// Return the minimum stride necessary to trigger software
2574	/// prefetching.
2575	///
2576	unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2577	unsigned NumStridedMemAccesses,
2578	unsigned NumPrefetches,
2579	bool HasCall) const override {
2580	return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2581	NumPrefetches, HasCall);
2582	}
2583
2584	/// Return the maximum prefetch distance in terms of loop
2585	/// iterations.
2586	///
2587	unsigned getMaxPrefetchIterationsAhead() const override {
2588	return Impl.getMaxPrefetchIterationsAhead();
2589	}
2590
2591	/// \return True if prefetching should also be done for writes.
2592	bool enableWritePrefetching() const override {
2593	return Impl.enableWritePrefetching();
2594	}
2595
2596	/// \return if target want to issue a prefetch in address space \p AS.
2597	bool shouldPrefetchAddressSpace(unsigned AS) const override {
2598	return Impl.shouldPrefetchAddressSpace(AS);
2599	}
2600
2601	unsigned getMaxInterleaveFactor(ElementCount VF) override {
2602	return Impl.getMaxInterleaveFactor(VF);
2603	}
2604	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2605	unsigned &JTSize,
2606	ProfileSummaryInfo *PSI,
2607	BlockFrequencyInfo *BFI) override {
2608	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2609	}
2610	InstructionCost getArithmeticInstrCost(
2611	unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2612	OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2613	ArrayRef<const Value *> Args,
2614	const Instruction CxtI = nullptr*) override {
2615	return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2616	Args, CxtI);
2617	}
2618	InstructionCost getAltInstrCost(VectorType VecTy, unsigned* Opcode0,
2619	unsigned Opcode1,
2620	const SmallBitVector &OpcodeMask,
2621	TTI::TargetCostKind CostKind) const override {
2622	return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask, CostKind);
2623	}
2624
2625	InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2626	ArrayRef<int> Mask,
2627	TTI::TargetCostKind CostKind, int Index,
2628	VectorType *SubTp,
2629	ArrayRef<const Value *> Args) override {
2630	return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2631	}
2632	InstructionCost getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
2633	CastContextHint CCH,
2634	TTI::TargetCostKind CostKind,
2635	const Instruction *I) override {
2636	return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2637	}
2638	InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2639	VectorType *VecTy,
2640	unsigned Index) override {
2641	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2642	}
2643	InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2644	const Instruction I = nullptr*) override {
2645	return Impl.getCFInstrCost(Opcode, CostKind, I);
2646	}
2647	InstructionCost getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
2648	CmpInst::Predicate VecPred,
2649	TTI::TargetCostKind CostKind,
2650	const Instruction *I) override {
2651	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2652	}
2653	InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2654	TTI::TargetCostKind CostKind,
2655	unsigned Index, Value *Op0,
2656	Value *Op1) override {
2657	return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2658	}
2659	InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2660	TTI::TargetCostKind CostKind,
2661	unsigned Index) override {
2662	return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2663	}
2664	InstructionCost
2665	getReplicationShuffleCost(Type EltTy, int* ReplicationFactor, int VF,
2666	const APInt &DemandedDstElts,
2667	TTI::TargetCostKind CostKind) override {
2668	return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2669	DemandedDstElts, CostKind);
2670	}
2671	InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2672	unsigned AddressSpace,
2673	TTI::TargetCostKind CostKind,
2674	OperandValueInfo OpInfo,
2675	const Instruction *I) override {
2676	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2677	OpInfo, I);
2678	}
2679	InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2680	unsigned AddressSpace,
2681	TTI::TargetCostKind CostKind,
2682	const Instruction *I) override {
2683	return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2684	CostKind, I);
2685	}
2686	InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2687	Align Alignment, unsigned AddressSpace,
2688	TTI::TargetCostKind CostKind) override {
2689	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2690	CostKind);
2691	}
2692	InstructionCost
2693	getGatherScatterOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2694	bool VariableMask, Align Alignment,
2695	TTI::TargetCostKind CostKind,
2696	const Instruction I = nullptr*) override {
2697	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2698	Alignment, CostKind, I);
2699	}
2700	InstructionCost
2701	getStridedMemoryOpCost(unsigned Opcode, Type DataTy, const* Value *Ptr,
2702	bool VariableMask, Align Alignment,
2703	TTI::TargetCostKind CostKind,
2704	const Instruction I = nullptr*) override {
2705	return Impl.getStridedMemoryOpCost(Opcode, DataTy, Ptr, VariableMask,
2706	Alignment, CostKind, I);
2707	}
2708	InstructionCost getInterleavedMemoryOpCost(
2709	unsigned Opcode, Type VecTy, unsigned* Factor, ArrayRef<unsigned> Indices,
2710	Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2711	bool UseMaskForCond, bool UseMaskForGaps) override {
2712	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2713	Alignment, AddressSpace, CostKind,
2714	UseMaskForCond, UseMaskForGaps);
2715	}
2716	InstructionCost
2717	getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2718	std::optional<FastMathFlags> FMF,
2719	TTI::TargetCostKind CostKind) override {
2720	return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2721	}
2722	InstructionCost
2723	getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty, FastMathFlags FMF,
2724	TTI::TargetCostKind CostKind) override {
2725	return Impl.getMinMaxReductionCost(IID, Ty, FMF, CostKind);
2726	}
2727	InstructionCost
2728	getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2729	VectorType *Ty, FastMathFlags FMF,
2730	TTI::TargetCostKind CostKind) override {
2731	return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2732	CostKind);
2733	}
2734	InstructionCost
2735	getMulAccReductionCost(bool IsUnsigned, Type ResTy, VectorType Ty,
2736	TTI::TargetCostKind CostKind) override {
2737	return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2738	}
2739	InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2740	TTI::TargetCostKind CostKind) override {
2741	return Impl.getIntrinsicInstrCost(ICA, CostKind);
2742	}
2743	InstructionCost getCallInstrCost(Function F, Type RetTy,
2744	ArrayRef<Type *> Tys,
2745	TTI::TargetCostKind CostKind) override {
2746	return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2747	}
2748	unsigned getNumberOfParts(Type *Tp) override {
2749	return Impl.getNumberOfParts(Tp);
2750	}
2751	InstructionCost getAddressComputationCost(Type Ty, ScalarEvolution SE,
2752	const SCEV *Ptr) override {
2753	return Impl.getAddressComputationCost(Ty, SE, Ptr);
2754	}
2755	InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2756	return Impl.getCostOfKeepingLiveOverCall(Tys);
2757	}
2758	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2759	MemIntrinsicInfo &Info) override {
2760	return Impl.getTgtMemIntrinsic(Inst, Info);
2761	}
2762	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2763	return Impl.getAtomicMemIntrinsicMaxElementSize();
2764	}
2765	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
2766	Type *ExpectedType) override {
2767	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2768	}
2769	Type *getMemcpyLoopLoweringType(
2770	LLVMContext &Context, Value Length, unsigned* SrcAddrSpace,
2771	unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2772	std::optional<uint32_t> AtomicElementSize) const override {
2773	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2774	DestAddrSpace, SrcAlign, DestAlign,
2775	AtomicElementSize);
2776	}
2777	void getMemcpyLoopResidualLoweringType(
2778	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2779	unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2780	unsigned SrcAlign, unsigned DestAlign,
2781	std::optional<uint32_t> AtomicCpySize) const override {
2782	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2783	SrcAddrSpace, DestAddrSpace,
2784	SrcAlign, DestAlign, AtomicCpySize);
2785	}
2786	bool areInlineCompatible(const Function *Caller,
2787	const Function Callee) const* override {
2788	return Impl.areInlineCompatible(Caller, Callee);
2789	}
2790	unsigned getInlineCallPenalty(const Function F, const* CallBase &Call,
2791	unsigned DefaultCallPenalty) const override {
2792	return Impl.getInlineCallPenalty(F, Call, DefaultCallPenalty);
2793	}
2794	bool areTypesABICompatible(const Function Caller, const* Function *Callee,
2795	const ArrayRef<Type > &Types) const* override {
2796	return Impl.areTypesABICompatible(Caller, Callee, Types);
2797	}
2798	bool isIndexedLoadLegal(MemIndexedMode Mode, Type Ty) const* override {
2799	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2800	}
2801	bool isIndexedStoreLegal(MemIndexedMode Mode, Type Ty) const* override {
2802	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2803	}
2804	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2805	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2806	}
2807	bool isLegalToVectorizeLoad(LoadInst LI) const* override {
2808	return Impl.isLegalToVectorizeLoad(LI);
2809	}
2810	bool isLegalToVectorizeStore(StoreInst SI) const* override {
2811	return Impl.isLegalToVectorizeStore(SI);
2812	}
2813	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2814	unsigned AddrSpace) const override {
2815	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2816	AddrSpace);
2817	}
2818	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2819	unsigned AddrSpace) const override {
2820	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2821	AddrSpace);
2822	}
2823	bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2824	ElementCount VF) const override {
2825	return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2826	}
2827	bool isElementTypeLegalForScalableVector(Type Ty) const* override {
2828	return Impl.isElementTypeLegalForScalableVector(Ty);
2829	}
2830	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2831	unsigned ChainSizeInBytes,
2832	VectorType VecTy) const* override {
2833	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2834	}
2835	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2836	unsigned ChainSizeInBytes,
2837	VectorType VecTy) const* override {
2838	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2839	}
2840	bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2841	ReductionFlags Flags) const override {
2842	return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2843	}
2844	bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2845	ReductionFlags Flags) const override {
2846	return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2847	}
2848	bool preferEpilogueVectorization() const override {
2849	return Impl.preferEpilogueVectorization();
2850	}
2851
2852	bool shouldExpandReduction(const IntrinsicInst II) const* override {
2853	return Impl.shouldExpandReduction(II);
2854	}
2855
2856	unsigned getGISelRematGlobalCost() const override {
2857	return Impl.getGISelRematGlobalCost();
2858	}
2859
2860	unsigned getMinTripCountTailFoldingThreshold() const override {
2861	return Impl.getMinTripCountTailFoldingThreshold();
2862	}
2863
2864	bool supportsScalableVectors() const override {
2865	return Impl.supportsScalableVectors();
2866	}
2867
2868	bool enableScalableVectorization() const override {
2869	return Impl.enableScalableVectorization();
2870	}
2871
2872	bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2873	Align Alignment) const override {
2874	return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2875	}
2876
2877	VPLegalization
2878	getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2879	return Impl.getVPLegalizationStrategy(PI);
2880	}
2881
2882	bool hasArmWideBranch(bool Thumb) const override {
2883	return Impl.hasArmWideBranch(Thumb);
2884	}
2885
2886	unsigned getMaxNumArgs() const override {
2887	return Impl.getMaxNumArgs();
2888	}
2889	};
2890
2891	template <typename T>
2892	TargetTransformInfo::TargetTransformInfo(T Impl)
2893	: TTIImpl(new Model<T>(Impl)) {}
2894
2895	/// Analysis pass providing the \c TargetTransformInfo.
2896	///
2897	/// The core idea of the TargetIRAnalysis is to expose an interface through
2898	/// which LLVM targets can analyze and provide information about the middle
2899	/// end's target-independent IR. This supports use cases such as target-aware
2900	/// cost modeling of IR constructs.
2901	///
2902	/// This is a function analysis because much of the cost modeling for targets
2903	/// is done in a subtarget specific way and LLVM supports compiling different
2904	/// functions targeting different subtargets in order to support runtime
2905	/// dispatch according to the observed subtarget.
2906	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2907	public:
2908	typedef TargetTransformInfo Result;
2909
2910	/// Default construct a target IR analysis.
2911	///
2912	/// This will use the module's datalayout to construct a baseline
2913	/// conservative TTI result.
2914	TargetIRAnalysis();
2915
2916	/// Construct an IR analysis pass around a target-provide callback.
2917	///
2918	/// The callback will be called with a particular function for which the TTI
2919	/// is needed and must return a TTI object for that function.
2920	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2921
2922	// Value semantics. We spell out the constructors for MSVC.
2923	TargetIRAnalysis(const TargetIRAnalysis &Arg)
2924	: TTICallback (Arg.TTICallback) {}
2925	TargetIRAnalysis(TargetIRAnalysis &&Arg)
2926	: TTICallback (std::move(Arg.TTICallback)) {}
2927	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
2928	TTICallback = RHS.TTICallback;
2929	return *this;
2930	}
2931	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
2932	TTICallback = std::move(RHS.TTICallback);
2933	return *this;
2934	}
2935
2936	Result run(const Function &F, FunctionAnalysisManager &);
2937
2938	private:
2939	friend AnalysisInfoMixin<TargetIRAnalysis>;
2940	static AnalysisKey Key;
2941
2942	/// The callback used to produce a result.
2943	///
2944	/// We use a completely opaque callback so that targets can provide whatever
2945	/// mechanism they desire for constructing the TTI for a given function.
2946	///
2947	/// FIXME: Should we really use std::function? It's relatively inefficient.
2948	/// It might be possible to arrange for even stateful callbacks to outlive
2949	/// the analysis and thus use a function_ref which would be lighter weight.
2950	/// This may also be less error prone as the callback is likely to reference
2951	/// the external TargetMachine, and that reference needs to never dangle.
2952	std::function<Result(const Function &)> TTICallback;
2953
2954	/// Helper function used as the callback in the default constructor.
2955	static Result getDefaultTTI(const Function &F);
2956	};
2957
2958	/// Wrapper pass for TargetTransformInfo.
2959	///
2960	/// This pass can be constructed from a TTI object which it stores internally
2961	/// and is queried by passes.
2962	class TargetTransformInfoWrapperPass : public ImmutablePass {
2963	TargetIRAnalysis TIRA;
2964	std::optional<TargetTransformInfo> TTI;
2965
2966	virtual void anchor();
2967
2968	public:
2969	static char ID;
2970
2971	/// We must provide a default constructor for the pass but it should
2972	/// never be used.
2973	///
2974	/// Use the constructor below or call one of the creation routines.
2975	TargetTransformInfoWrapperPass();
2976
2977	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2978
2979	TargetTransformInfo &getTTI(const Function &F);
2980	};
2981
2982	/// Create an analysis pass wrapper around a TTI object.
2983	///
2984	/// This analysis pass just holds the TTI instance and makes it available to
2985	/// clients.
2986	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
2987
2988	} // namespace llvm
2989
2990	#endif
2991

source code of llvm/include/llvm/Analysis/TargetTransformInfo.h