1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file describes how to lower LLVM code to machine code. This has two |
11 | /// main components: |
12 | /// |
13 | /// 1. Which ValueTypes are natively supported by the target. |
14 | /// 2. Which operations are supported for supported ValueTypes. |
15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
16 | /// |
17 | /// In addition it has a few other components, like information about FP |
18 | /// immediates. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
24 | |
25 | #include "llvm/ADT/APInt.h" |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/SmallVector.h" |
29 | #include "llvm/ADT/StringRef.h" |
30 | #include "llvm/CodeGen/DAGCombine.h" |
31 | #include "llvm/CodeGen/ISDOpcodes.h" |
32 | #include "llvm/CodeGen/LowLevelTypeUtils.h" |
33 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
34 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
35 | #include "llvm/CodeGen/SelectionDAG.h" |
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | #include "llvm/CodeGen/TargetCallingConv.h" |
38 | #include "llvm/CodeGen/ValueTypes.h" |
39 | #include "llvm/CodeGenTypes/MachineValueType.h" |
40 | #include "llvm/IR/Attributes.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/DataLayout.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/InlineAsm.h" |
46 | #include "llvm/IR/Instruction.h" |
47 | #include "llvm/IR/Instructions.h" |
48 | #include "llvm/IR/Type.h" |
49 | #include "llvm/Support/Alignment.h" |
50 | #include "llvm/Support/AtomicOrdering.h" |
51 | #include "llvm/Support/Casting.h" |
52 | #include "llvm/Support/ErrorHandling.h" |
53 | #include <algorithm> |
54 | #include <cassert> |
55 | #include <climits> |
56 | #include <cstdint> |
57 | #include <iterator> |
58 | #include <map> |
59 | #include <string> |
60 | #include <utility> |
61 | #include <vector> |
62 | |
63 | namespace llvm { |
64 | |
65 | class AssumptionCache; |
66 | class CCState; |
67 | class CCValAssign; |
68 | enum class ComplexDeinterleavingOperation; |
69 | enum class ComplexDeinterleavingRotation; |
70 | class Constant; |
71 | class FastISel; |
72 | class FunctionLoweringInfo; |
73 | class GlobalValue; |
74 | class Loop; |
75 | class GISelKnownBits; |
76 | class IntrinsicInst; |
77 | class IRBuilderBase; |
78 | struct KnownBits; |
79 | class LLVMContext; |
80 | class MachineBasicBlock; |
81 | class MachineFunction; |
82 | class MachineInstr; |
83 | class MachineJumpTableInfo; |
84 | class MachineLoop; |
85 | class MachineRegisterInfo; |
86 | class MCContext; |
87 | class MCExpr; |
88 | class Module; |
89 | class ProfileSummaryInfo; |
90 | class TargetLibraryInfo; |
91 | class TargetMachine; |
92 | class TargetRegisterClass; |
93 | class TargetRegisterInfo; |
94 | class TargetTransformInfo; |
95 | class Value; |
96 | |
97 | namespace Sched { |
98 | |
99 | enum Preference { |
100 | None, // No preference |
101 | Source, // Follow source order. |
102 | RegPressure, // Scheduling for lowest register pressure. |
103 | Hybrid, // Scheduling for both latency and register pressure. |
104 | ILP, // Scheduling for ILP in low register pressure mode. |
105 | VLIW, // Scheduling for VLIW targets. |
106 | Fast, // Fast suboptimal list scheduling |
107 | Linearize // Linearize DAG, no scheduling |
108 | }; |
109 | |
110 | } // end namespace Sched |
111 | |
112 | // MemOp models a memory operation, either memset or memcpy/memmove. |
113 | struct MemOp { |
114 | private: |
115 | // Shared |
116 | uint64_t Size; |
117 | bool DstAlignCanChange; // true if destination alignment can satisfy any |
118 | // constraint. |
119 | Align DstAlign; // Specified alignment of the memory operation. |
120 | |
121 | bool AllowOverlap; |
122 | // memset only |
123 | bool IsMemset; // If setthis memory operation is a memset. |
124 | bool ZeroMemset; // If set clears out memory with zeros. |
125 | // memcpy only |
126 | bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register |
127 | // constant so it does not need to be loaded. |
128 | Align SrcAlign; // Inferred alignment of the source or default value if the |
129 | // memory operation does not need to load the value. |
130 | public: |
131 | static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
132 | Align SrcAlign, bool IsVolatile, |
133 | bool MemcpyStrSrc = false) { |
134 | MemOp Op; |
135 | Op.Size = Size; |
136 | Op.DstAlignCanChange = DstAlignCanChange; |
137 | Op.DstAlign = DstAlign; |
138 | Op.AllowOverlap = !IsVolatile; |
139 | Op.IsMemset = false; |
140 | Op.ZeroMemset = false; |
141 | Op.MemcpyStrSrc = MemcpyStrSrc; |
142 | Op.SrcAlign = SrcAlign; |
143 | return Op; |
144 | } |
145 | |
146 | static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, |
147 | bool IsZeroMemset, bool IsVolatile) { |
148 | MemOp Op; |
149 | Op.Size = Size; |
150 | Op.DstAlignCanChange = DstAlignCanChange; |
151 | Op.DstAlign = DstAlign; |
152 | Op.AllowOverlap = !IsVolatile; |
153 | Op.IsMemset = true; |
154 | Op.ZeroMemset = IsZeroMemset; |
155 | Op.MemcpyStrSrc = false; |
156 | return Op; |
157 | } |
158 | |
159 | uint64_t size() const { return Size; } |
160 | Align getDstAlign() const { |
161 | assert(!DstAlignCanChange); |
162 | return DstAlign; |
163 | } |
164 | bool isFixedDstAlign() const { return !DstAlignCanChange; } |
165 | bool allowOverlap() const { return AllowOverlap; } |
166 | bool isMemset() const { return IsMemset; } |
167 | bool isMemcpy() const { return !IsMemset; } |
168 | bool isMemcpyWithFixedDstAlign() const { |
169 | return isMemcpy() && !DstAlignCanChange; |
170 | } |
171 | bool isZeroMemset() const { return isMemset() && ZeroMemset; } |
172 | bool isMemcpyStrSrc() const { |
173 | assert(isMemcpy() && "Must be a memcpy" ); |
174 | return MemcpyStrSrc; |
175 | } |
176 | Align getSrcAlign() const { |
177 | assert(isMemcpy() && "Must be a memcpy" ); |
178 | return SrcAlign; |
179 | } |
180 | bool isSrcAligned(Align AlignCheck) const { |
181 | return isMemset() || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: SrcAlign.value()); |
182 | } |
183 | bool isDstAligned(Align AlignCheck) const { |
184 | return DstAlignCanChange || llvm::isAligned(Lhs: AlignCheck, SizeInBytes: DstAlign.value()); |
185 | } |
186 | bool isAligned(Align AlignCheck) const { |
187 | return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); |
188 | } |
189 | }; |
190 | |
191 | /// This base class for TargetLowering contains the SelectionDAG-independent |
192 | /// parts that can be used from the rest of CodeGen. |
193 | class TargetLoweringBase { |
194 | public: |
195 | /// This enum indicates whether operations are valid for a target, and if not, |
196 | /// what action should be used to make them valid. |
197 | enum LegalizeAction : uint8_t { |
198 | Legal, // The target natively supports this operation. |
199 | Promote, // This operation should be executed in a larger type. |
200 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
201 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
202 | Custom // Use the LowerOperation hook to implement custom lowering. |
203 | }; |
204 | |
205 | /// This enum indicates whether a types are legal for a target, and if not, |
206 | /// what action should be used to make them valid. |
207 | enum LegalizeTypeAction : uint8_t { |
208 | TypeLegal, // The target natively supports this type. |
209 | TypePromoteInteger, // Replace this integer with a larger one. |
210 | TypeExpandInteger, // Split this integer into two of half the size. |
211 | TypeSoftenFloat, // Convert this float to a same size integer type. |
212 | TypeExpandFloat, // Split this float into two of half the size. |
213 | TypeScalarizeVector, // Replace this one-element vector with its element. |
214 | TypeSplitVector, // Split this vector into two of half the size. |
215 | TypeWidenVector, // This vector should be widened into a larger vector. |
216 | TypePromoteFloat, // Replace this float with a larger one. |
217 | TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. |
218 | TypeScalarizeScalableVector, // This action is explicitly left unimplemented. |
219 | // While it is theoretically possible to |
220 | // legalize operations on scalable types with a |
221 | // loop that handles the vscale * #lanes of the |
222 | // vector, this is non-trivial at SelectionDAG |
223 | // level and these types are better to be |
224 | // widened or promoted. |
225 | }; |
226 | |
227 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
228 | /// in order to type-legalize it. |
229 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
230 | |
231 | /// Enum that describes how the target represents true/false values. |
232 | enum BooleanContent { |
233 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
234 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
235 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
236 | }; |
237 | |
238 | /// Enum that describes what type of support for selects the target has. |
239 | enum SelectSupportKind { |
240 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
241 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
242 | // and vector values (ex: cmov). |
243 | VectorMaskSelect // The target supports vector selects with a vector |
244 | // mask (ex: x86 blends). |
245 | }; |
246 | |
247 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
248 | /// to, if at all. Exists because different targets have different levels of |
249 | /// support for these atomic instructions, and also have different options |
250 | /// w.r.t. what they should expand to. |
251 | enum class AtomicExpansionKind { |
252 | None, // Don't expand the instruction. |
253 | CastToInteger, // Cast the atomic instruction to another type, e.g. from |
254 | // floating-point to integer type. |
255 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
256 | // by ARM/AArch64. |
257 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
258 | // greater atomic guarantees than a normal load. |
259 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
260 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
261 | BitTestIntrinsic, // Use a target-specific intrinsic for special bit |
262 | // operations; used by X86. |
263 | CmpArithIntrinsic,// Use a target-specific intrinsic for special compare |
264 | // operations; used by X86. |
265 | Expand, // Generic expansion in terms of other atomic operations. |
266 | |
267 | // Rewrite to a non-atomic form for use in a known non-preemptible |
268 | // environment. |
269 | NotAtomic |
270 | }; |
271 | |
272 | /// Enum that specifies when a multiplication should be expanded. |
273 | enum class MulExpansionKind { |
274 | Always, // Always expand the instruction. |
275 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
276 | // or custom. |
277 | }; |
278 | |
279 | /// Enum that specifies when a float negation is beneficial. |
280 | enum class NegatibleCost { |
281 | Cheaper = 0, // Negated expression is cheaper. |
282 | Neutral = 1, // Negated expression has the same cost. |
283 | Expensive = 2 // Negated expression is more expensive. |
284 | }; |
285 | |
286 | /// Enum of different potentially desirable ways to fold (and/or (setcc ...), |
287 | /// (setcc ...)). |
288 | enum AndOrSETCCFoldKind : uint8_t { |
289 | None = 0, // No fold is preferable. |
290 | AddAnd = 1, // Fold with `Add` op and `And` op is preferable. |
291 | NotAnd = 2, // Fold with `Not` op and `And` op is preferable. |
292 | ABS = 4, // Fold with `llvm.abs` op is preferable. |
293 | }; |
294 | |
295 | class ArgListEntry { |
296 | public: |
297 | Value *Val = nullptr; |
298 | SDValue Node = SDValue(); |
299 | Type *Ty = nullptr; |
300 | bool IsSExt : 1; |
301 | bool IsZExt : 1; |
302 | bool IsInReg : 1; |
303 | bool IsSRet : 1; |
304 | bool IsNest : 1; |
305 | bool IsByVal : 1; |
306 | bool IsByRef : 1; |
307 | bool IsInAlloca : 1; |
308 | bool IsPreallocated : 1; |
309 | bool IsReturned : 1; |
310 | bool IsSwiftSelf : 1; |
311 | bool IsSwiftAsync : 1; |
312 | bool IsSwiftError : 1; |
313 | bool IsCFGuardTarget : 1; |
314 | MaybeAlign Alignment = std::nullopt; |
315 | Type *IndirectType = nullptr; |
316 | |
317 | ArgListEntry() |
318 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
319 | IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), |
320 | IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), |
321 | IsSwiftAsync(false), IsSwiftError(false), IsCFGuardTarget(false) {} |
322 | |
323 | void setAttributes(const CallBase *Call, unsigned ArgIdx); |
324 | }; |
325 | using ArgListTy = std::vector<ArgListEntry>; |
326 | |
327 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
328 | ArgListTy &Args) const {}; |
329 | |
330 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
331 | switch (Content) { |
332 | case UndefinedBooleanContent: |
333 | // Extend by adding rubbish bits. |
334 | return ISD::ANY_EXTEND; |
335 | case ZeroOrOneBooleanContent: |
336 | // Extend by adding zero bits. |
337 | return ISD::ZERO_EXTEND; |
338 | case ZeroOrNegativeOneBooleanContent: |
339 | // Extend by copying the sign bit. |
340 | return ISD::SIGN_EXTEND; |
341 | } |
342 | llvm_unreachable("Invalid content kind" ); |
343 | } |
344 | |
345 | explicit TargetLoweringBase(const TargetMachine &TM); |
346 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
347 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
348 | virtual ~TargetLoweringBase() = default; |
349 | |
350 | /// Return true if the target support strict float operation |
351 | bool isStrictFPEnabled() const { |
352 | return IsStrictFPEnabled; |
353 | } |
354 | |
355 | protected: |
356 | /// Initialize all of the actions to default values. |
357 | void initActions(); |
358 | |
359 | public: |
360 | const TargetMachine &getTargetMachine() const { return TM; } |
361 | |
362 | virtual bool useSoftFloat() const { return false; } |
363 | |
364 | /// Return the pointer type for the given address space, defaults to |
365 | /// the pointer type from the data layout. |
366 | /// FIXME: The default needs to be removed once all the code is updated. |
367 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
368 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
369 | } |
370 | |
371 | /// Return the in-memory pointer type for the given address space, defaults to |
372 | /// the pointer type from the data layout. |
373 | /// FIXME: The default needs to be removed once all the code is updated. |
374 | virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
375 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS)); |
376 | } |
377 | |
378 | /// Return the type for frame index, which is determined by |
379 | /// the alloca address space specified through the data layout. |
380 | MVT getFrameIndexTy(const DataLayout &DL) const { |
381 | return getPointerTy(DL, AS: DL.getAllocaAddrSpace()); |
382 | } |
383 | |
384 | /// Return the type for code pointers, which is determined by the program |
385 | /// address space specified through the data layout. |
386 | MVT getProgramPointerTy(const DataLayout &DL) const { |
387 | return getPointerTy(DL, AS: DL.getProgramAddressSpace()); |
388 | } |
389 | |
390 | /// Return the type for operands of fence. |
391 | /// TODO: Let fence operands be of i32 type and remove this. |
392 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
393 | return getPointerTy(DL); |
394 | } |
395 | |
396 | /// Return the type to use for a scalar shift opcode, given the shifted amount |
397 | /// type. Targets should return a legal type if the input type is legal. |
398 | /// Targets can return a type that is too small if the input type is illegal. |
399 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
400 | |
401 | /// Returns the type for the shift amount of a shift opcode. For vectors, |
402 | /// returns the input type. For scalars, behavior depends on \p LegalTypes. If |
403 | /// \p LegalTypes is true, calls getScalarShiftAmountTy, otherwise uses |
404 | /// pointer type. If getScalarShiftAmountTy or pointer type cannot represent |
405 | /// all possible shift amounts, returns MVT::i32. In general, \p LegalTypes |
406 | /// should be set to true for calls during type legalization and after type |
407 | /// legalization has been completed. |
408 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
409 | bool LegalTypes = true) const; |
410 | |
411 | /// Return the preferred type to use for a shift opcode, given the shifted |
412 | /// amount type is \p ShiftValueTy. |
413 | LLVM_READONLY |
414 | virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { |
415 | return ShiftValueTy; |
416 | } |
417 | |
418 | /// Returns the type to be used for the index operand of: |
419 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
420 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
421 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
422 | return getPointerTy(DL); |
423 | } |
424 | |
425 | /// Returns the type to be used for the EVL/AVL operand of VP nodes: |
426 | /// ISD::VP_ADD, ISD::VP_SUB, etc. It must be a legal scalar integer type, |
427 | /// and must be at least as large as i32. The EVL is implicitly zero-extended |
428 | /// to any larger type. |
429 | virtual MVT getVPExplicitVectorLengthTy() const { return MVT::i32; } |
430 | |
431 | /// This callback is used to inspect load/store instructions and add |
432 | /// target-specific MachineMemOperand flags to them. The default |
433 | /// implementation does nothing. |
434 | virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { |
435 | return MachineMemOperand::MONone; |
436 | } |
437 | |
438 | /// This callback is used to inspect load/store SDNode. |
439 | /// The default implementation does nothing. |
440 | virtual MachineMemOperand::Flags |
441 | getTargetMMOFlags(const MemSDNode &Node) const { |
442 | return MachineMemOperand::MONone; |
443 | } |
444 | |
445 | MachineMemOperand::Flags |
446 | getLoadMemOperandFlags(const LoadInst &LI, const DataLayout &DL, |
447 | AssumptionCache *AC = nullptr, |
448 | const TargetLibraryInfo *LibInfo = nullptr) const; |
449 | MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, |
450 | const DataLayout &DL) const; |
451 | MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, |
452 | const DataLayout &DL) const; |
453 | |
454 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
455 | return true; |
456 | } |
457 | |
458 | /// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded |
459 | /// using generic code in SelectionDAGBuilder. |
460 | virtual bool shouldExpandGetActiveLaneMask(EVT VT, EVT OpVT) const { |
461 | return true; |
462 | } |
463 | |
464 | virtual bool shouldExpandGetVectorLength(EVT CountVT, unsigned VF, |
465 | bool IsScalable) const { |
466 | return true; |
467 | } |
468 | |
469 | /// Return true if the @llvm.experimental.cttz.elts intrinsic should be |
470 | /// expanded using generic code in SelectionDAGBuilder. |
471 | virtual bool shouldExpandCttzElements(EVT VT) const { return true; } |
472 | |
473 | // Return true if op(vecreduce(x), vecreduce(y)) should be reassociated to |
474 | // vecreduce(op(x, y)) for the reduction opcode RedOpc. |
475 | virtual bool shouldReassociateReduction(unsigned RedOpc, EVT VT) const { |
476 | return true; |
477 | } |
478 | |
479 | /// Return true if it is profitable to convert a select of FP constants into |
480 | /// a constant pool load whose address depends on the select condition. The |
481 | /// parameter may be used to differentiate a select with FP compare from |
482 | /// integer compare. |
483 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
484 | return true; |
485 | } |
486 | |
487 | /// Return true if multiple condition registers are available. |
488 | bool hasMultipleConditionRegisters() const { |
489 | return HasMultipleConditionRegisters; |
490 | } |
491 | |
492 | /// Return true if the target has BitExtract instructions. |
493 | bool () const { return HasExtractBitsInsn; } |
494 | |
495 | /// Return the preferred vector type legalization action. |
496 | virtual TargetLoweringBase::LegalizeTypeAction |
497 | getPreferredVectorAction(MVT VT) const { |
498 | // The default action for one element vectors is to scalarize |
499 | if (VT.getVectorElementCount().isScalar()) |
500 | return TypeScalarizeVector; |
501 | // The default action for an odd-width vector is to widen. |
502 | if (!VT.isPow2VectorType()) |
503 | return TypeWidenVector; |
504 | // The default action for other vectors is to promote |
505 | return TypePromoteInteger; |
506 | } |
507 | |
508 | // Return true if the half type should be promoted using soft promotion rules |
509 | // where each operation is promoted to f32 individually, then converted to |
510 | // fp16. The default behavior is to promote chains of operations, keeping |
511 | // intermediate results in f32 precision and range. |
512 | virtual bool softPromoteHalfType() const { return false; } |
513 | |
514 | // Return true if, for soft-promoted half, the half type should be passed |
515 | // passed to and returned from functions as f32. The default behavior is to |
516 | // pass as i16. If soft-promoted half is not used, this function is ignored |
517 | // and values are always passed and returned as f32. |
518 | virtual bool useFPRegsForHalfType() const { return false; } |
519 | |
520 | // There are two general methods for expanding a BUILD_VECTOR node: |
521 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
522 | // them together. |
523 | // 2. Build the vector on the stack and then load it. |
524 | // If this function returns true, then method (1) will be used, subject to |
525 | // the constraint that all of the necessary shuffles are legal (as determined |
526 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
527 | // always used. The vector type, and the number of defined values, are |
528 | // provided. |
529 | virtual bool |
530 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
531 | unsigned DefinedValues) const { |
532 | return DefinedValues < 3; |
533 | } |
534 | |
535 | /// Return true if integer divide is usually cheaper than a sequence of |
536 | /// several shifts, adds, and multiplies for this target. |
537 | /// The definition of "cheaper" may depend on whether we're optimizing |
538 | /// for speed or for size. |
539 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
540 | |
541 | /// Return true if the target can handle a standalone remainder operation. |
542 | virtual bool hasStandaloneRem(EVT VT) const { |
543 | return true; |
544 | } |
545 | |
546 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
547 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
548 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
549 | return false; |
550 | } |
551 | |
552 | /// Reciprocal estimate status values used by the functions below. |
553 | enum ReciprocalEstimate : int { |
554 | Unspecified = -1, |
555 | Disabled = 0, |
556 | Enabled = 1 |
557 | }; |
558 | |
559 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
560 | /// based on the function's attributes. If the operation is not overridden by |
561 | /// the function's attributes, "Unspecified" is returned and target defaults |
562 | /// are expected to be used for instruction selection. |
563 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
564 | |
565 | /// Return a ReciprocalEstimate enum value for a division of the given type |
566 | /// based on the function's attributes. If the operation is not overridden by |
567 | /// the function's attributes, "Unspecified" is returned and target defaults |
568 | /// are expected to be used for instruction selection. |
569 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
570 | |
571 | /// Return the refinement step count for a square root of the given type based |
572 | /// on the function's attributes. If the operation is not overridden by |
573 | /// the function's attributes, "Unspecified" is returned and target defaults |
574 | /// are expected to be used for instruction selection. |
575 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
576 | |
577 | /// Return the refinement step count for a division of the given type based |
578 | /// on the function's attributes. If the operation is not overridden by |
579 | /// the function's attributes, "Unspecified" is returned and target defaults |
580 | /// are expected to be used for instruction selection. |
581 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
582 | |
583 | /// Returns true if target has indicated at least one type should be bypassed. |
584 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
585 | |
586 | /// Returns map of slow types for division or remainder with corresponding |
587 | /// fast types |
588 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
589 | return BypassSlowDivWidths; |
590 | } |
591 | |
592 | /// Return true only if vscale must be a power of two. |
593 | virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; } |
594 | |
595 | /// Return true if Flow Control is an expensive operation that should be |
596 | /// avoided. |
597 | bool isJumpExpensive() const { return JumpIsExpensive; } |
598 | |
599 | /// Return true if selects are only cheaper than branches if the branch is |
600 | /// unlikely to be predicted right. |
601 | bool isPredictableSelectExpensive() const { |
602 | return PredictableSelectIsExpensive; |
603 | } |
604 | |
605 | virtual bool fallBackToDAGISel(const Instruction &Inst) const { |
606 | return false; |
607 | } |
608 | |
609 | /// Return true if the following transform is beneficial: |
610 | /// fold (conv (load x)) -> (load (conv*)x) |
611 | /// On architectures that don't natively support some vector loads |
612 | /// efficiently, casting the load to a smaller vector of larger types and |
613 | /// loading is more efficient, however, this can be undone by optimizations in |
614 | /// dag combiner. |
615 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
616 | const SelectionDAG &DAG, |
617 | const MachineMemOperand &MMO) const; |
618 | |
619 | /// Return true if the following transform is beneficial: |
620 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
621 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
622 | const SelectionDAG &DAG, |
623 | const MachineMemOperand &MMO) const { |
624 | // Default to the same logic as loads. |
625 | return isLoadBitCastBeneficial(LoadVT: StoreVT, BitcastVT, DAG, MMO); |
626 | } |
627 | |
628 | /// Return true if it is expected to be cheaper to do a store of vector |
629 | /// constant with the given size and type for the address space than to |
630 | /// store the individual scalar element constants. |
631 | virtual bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, |
632 | unsigned NumElem, |
633 | unsigned AddrSpace) const { |
634 | return IsZero; |
635 | } |
636 | |
637 | /// Allow store merging for the specified type after legalization in addition |
638 | /// to before legalization. This may transform stores that do not exist |
639 | /// earlier (for example, stores created from intrinsics). |
640 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
641 | return true; |
642 | } |
643 | |
644 | /// Returns if it's reasonable to merge stores to MemVT size. |
645 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
646 | const MachineFunction &MF) const { |
647 | return true; |
648 | } |
649 | |
650 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
651 | virtual bool isCheapToSpeculateCttz(Type *Ty) const { |
652 | return false; |
653 | } |
654 | |
655 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
656 | virtual bool isCheapToSpeculateCtlz(Type *Ty) const { |
657 | return false; |
658 | } |
659 | |
660 | /// Return true if ctlz instruction is fast. |
661 | virtual bool isCtlzFast() const { |
662 | return false; |
663 | } |
664 | |
665 | /// Return true if ctpop instruction is fast. |
666 | virtual bool isCtpopFast(EVT VT) const { |
667 | return isOperationLegal(Op: ISD::CTPOP, VT); |
668 | } |
669 | |
670 | /// Return the maximum number of "x & (x - 1)" operations that can be done |
671 | /// instead of deferring to a custom CTPOP. |
672 | virtual unsigned getCustomCtpopCost(EVT VT, ISD::CondCode Cond) const { |
673 | return 1; |
674 | } |
675 | |
676 | /// Return true if instruction generated for equality comparison is folded |
677 | /// with instruction generated for signed comparison. |
678 | virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } |
679 | |
680 | /// Return true if the heuristic to prefer icmp eq zero should be used in code |
681 | /// gen prepare. |
682 | virtual bool preferZeroCompareBranch() const { return false; } |
683 | |
684 | /// Return true if it is cheaper to split the store of a merged int val |
685 | /// from a pair of smaller values into multiple stores. |
686 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
687 | return false; |
688 | } |
689 | |
690 | /// Return if the target supports combining a |
691 | /// chain like: |
692 | /// \code |
693 | /// %andResult = and %val1, #mask |
694 | /// %icmpResult = icmp %andResult, 0 |
695 | /// \endcode |
696 | /// into a single machine instruction of a form like: |
697 | /// \code |
698 | /// cc = test %register, #mask |
699 | /// \endcode |
700 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
701 | return false; |
702 | } |
703 | |
704 | /// Return true if it is valid to merge the TargetMMOFlags in two SDNodes. |
705 | virtual bool |
706 | areTwoSDNodeTargetMMOFlagsMergeable(const MemSDNode &NodeX, |
707 | const MemSDNode &NodeY) const { |
708 | return true; |
709 | } |
710 | |
711 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
712 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
713 | /// This should be true when it takes more than one instruction to lower |
714 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
715 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
716 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
717 | return false; |
718 | } |
719 | |
720 | /// Return the preferred operand type if the target has a quick way to compare |
721 | /// integer values of the given size. Assume that any legal integer type can |
722 | /// be compared efficiently. Targets may override this to allow illegal wide |
723 | /// types to return a vector type if there is support to compare that type. |
724 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
725 | MVT VT = MVT::getIntegerVT(BitWidth: NumBits); |
726 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
727 | } |
728 | |
729 | /// Return true if the target should transform: |
730 | /// (X & Y) == Y ---> (~X & Y) == 0 |
731 | /// (X & Y) != Y ---> (~X & Y) != 0 |
732 | /// |
733 | /// This may be profitable if the target has a bitwise and-not operation that |
734 | /// sets comparison flags. A target may want to limit the transformation based |
735 | /// on the type of Y or if Y is a constant. |
736 | /// |
737 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
738 | /// because a mask and compare of a single bit can be handled by inverting the |
739 | /// predicate, for example: |
740 | /// (X & 8) == 8 ---> (X & 8) != 0 |
741 | virtual bool hasAndNotCompare(SDValue Y) const { |
742 | return false; |
743 | } |
744 | |
745 | /// Return true if the target has a bitwise and-not operation: |
746 | /// X = ~A & B |
747 | /// This can be used to simplify select or other instructions. |
748 | virtual bool hasAndNot(SDValue X) const { |
749 | // If the target has the more complex version of this operation, assume that |
750 | // it has this operation too. |
751 | return hasAndNotCompare(Y: X); |
752 | } |
753 | |
754 | /// Return true if the target has a bit-test instruction: |
755 | /// (X & (1 << Y)) ==/!= 0 |
756 | /// This knowledge can be used to prevent breaking the pattern, |
757 | /// or creating it if it could be recognized. |
758 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } |
759 | |
760 | /// There are two ways to clear extreme bits (either low or high): |
761 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
762 | /// Shifts: x >> y << y |
763 | /// Return true if the variant with 2 variable shifts is preferred. |
764 | /// Return false if there is no preference. |
765 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
766 | // By default, let's assume that no one prefers shifts. |
767 | return false; |
768 | } |
769 | |
770 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
771 | /// This is usually true on most targets. But some targets, like Thumb1, |
772 | /// have immediate shift instructions, but no immediate "and" instruction; |
773 | /// this makes the fold unprofitable. |
774 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
775 | CombineLevel Level) const { |
776 | return true; |
777 | } |
778 | |
779 | /// Should we tranform the IR-optimal check for whether given truncation |
780 | /// down into KeptBits would be truncating or not: |
781 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
782 | /// Into it's more traditional form: |
783 | /// ((%x << C) a>> C) dstcond %x |
784 | /// Return true if we should transform. |
785 | /// Return false if there is no preference. |
786 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
787 | unsigned KeptBits) const { |
788 | // By default, let's assume that no one prefers shifts. |
789 | return false; |
790 | } |
791 | |
792 | /// Given the pattern |
793 | /// (X & (C l>>/<< Y)) ==/!= 0 |
794 | /// return true if it should be transformed into: |
795 | /// ((X <</l>> Y) & C) ==/!= 0 |
796 | /// WARNING: if 'X' is a constant, the fold may deadlock! |
797 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() |
798 | /// here because it can end up being not linked in. |
799 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
800 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
801 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
802 | SelectionDAG &DAG) const { |
803 | if (hasBitTest(X, Y)) { |
804 | // One interesting pattern that we'd want to form is 'bit test': |
805 | // ((1 << Y) & C) ==/!= 0 |
806 | // But we also need to be careful not to try to reverse that fold. |
807 | |
808 | // Is this '1 << Y' ? |
809 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) |
810 | return false; // Keep the 'bit test' pattern. |
811 | |
812 | // Will it be '1 << Y' after the transform ? |
813 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) |
814 | return true; // Do form the 'bit test' pattern. |
815 | } |
816 | |
817 | // If 'X' is a constant, and we transform, then we will immediately |
818 | // try to undo the fold, thus causing endless combine loop. |
819 | // So by default, let's assume everyone prefers the fold |
820 | // iff 'X' is not a constant. |
821 | return !XC; |
822 | } |
823 | |
824 | // Return true if its desirable to perform the following transform: |
825 | // (fmul C, (uitofp Pow2)) |
826 | // -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
827 | // (fdiv C, (uitofp Pow2)) |
828 | // -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) |
829 | // |
830 | // This is only queried after we have verified the transform will be bitwise |
831 | // equals. |
832 | // |
833 | // SDNode *N : The FDiv/FMul node we want to transform. |
834 | // SDValue FPConst: The Float constant operand in `N`. |
835 | // SDValue IntPow2: The Integer power of 2 operand in `N`. |
836 | virtual bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst, |
837 | SDValue IntPow2) const { |
838 | // Default to avoiding fdiv which is often very expensive. |
839 | return N->getOpcode() == ISD::FDIV; |
840 | } |
841 | |
842 | // Given: |
843 | // (icmp eq/ne (and X, C0), (shift X, C1)) |
844 | // or |
845 | // (icmp eq/ne X, (rotate X, CPow2)) |
846 | |
847 | // If C0 is a mask or shifted mask and the shift amt (C1) isolates the |
848 | // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) |
849 | // Do we prefer the shift to be shift-right, shift-left, or rotate. |
850 | // Note: Its only valid to convert the rotate version to the shift version iff |
851 | // the shift-amt (`C1`) is a power of 2 (including 0). |
852 | // If ShiftOpc (current Opcode) is returned, do nothing. |
853 | virtual unsigned preferedOpcodeForCmpEqPiecesOfOperand( |
854 | EVT VT, unsigned ShiftOpc, bool MayTransformRotate, |
855 | const APInt &ShiftOrRotateAmt, |
856 | const std::optional<APInt> &AndMask) const { |
857 | return ShiftOpc; |
858 | } |
859 | |
860 | /// These two forms are equivalent: |
861 | /// sub %y, (xor %x, -1) |
862 | /// add (add %x, 1), %y |
863 | /// The variant with two add's is IR-canonical. |
864 | /// Some targets may prefer one to the other. |
865 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
866 | // By default, let's assume that everyone prefers the form with two add's. |
867 | return true; |
868 | } |
869 | |
870 | // By default prefer folding (abs (sub nsw x, y)) -> abds(x, y). Some targets |
871 | // may want to avoid this to prevent loss of sub_nsw pattern. |
872 | virtual bool preferABDSToABSWithNSW(EVT VT) const { |
873 | return true; |
874 | } |
875 | |
876 | // Return true if the target wants to transform Op(Splat(X)) -> Splat(Op(X)) |
877 | virtual bool preferScalarizeSplat(SDNode *N) const { return true; } |
878 | |
879 | // Return true if the target wants to transform: |
880 | // (TruncVT truncate(sext_in_reg(VT X, ExtVT)) |
881 | // -> (TruncVT sext_in_reg(truncate(VT X), ExtVT)) |
882 | // Some targets might prefer pre-sextinreg to improve truncation/saturation. |
883 | virtual bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT, EVT ExtVT) const { |
884 | return true; |
885 | } |
886 | |
887 | /// Return true if the target wants to use the optimization that |
888 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
889 | /// promotedInst1(...(promotedInstN(ext(load)))). |
890 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
891 | |
892 | /// Return true if the target can combine store(extractelement VectorTy, |
893 | /// Idx). |
894 | /// \p Cost[out] gives the cost of that transformation when this is true. |
895 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
896 | unsigned &Cost) const { |
897 | return false; |
898 | } |
899 | |
900 | /// Return true if the target shall perform extract vector element and store |
901 | /// given that the vector is known to be splat of constant. |
902 | /// \p Index[out] gives the index of the vector element to be extracted when |
903 | /// this is true. |
904 | virtual bool ( |
905 | Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const { |
906 | return false; |
907 | } |
908 | |
909 | /// Return true if inserting a scalar into a variable element of an undef |
910 | /// vector is more efficiently handled by splatting the scalar instead. |
911 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
912 | return false; |
913 | } |
914 | |
915 | /// Return true if target always benefits from combining into FMA for a |
916 | /// given value type. This must typically return false on targets where FMA |
917 | /// takes more cycles to execute than FADD. |
918 | virtual bool enableAggressiveFMAFusion(EVT VT) const { return false; } |
919 | |
920 | /// Return true if target always benefits from combining into FMA for a |
921 | /// given value type. This must typically return false on targets where FMA |
922 | /// takes more cycles to execute than FADD. |
923 | virtual bool enableAggressiveFMAFusion(LLT Ty) const { return false; } |
924 | |
925 | /// Return the ValueType of the result of SETCC operations. |
926 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
927 | EVT VT) const; |
928 | |
929 | /// Return the ValueType for comparison libcalls. Comparison libcalls include |
930 | /// floating point comparison calls, and Ordered/Unordered check calls on |
931 | /// floating point numbers. |
932 | virtual |
933 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
934 | |
935 | /// For targets without i1 registers, this gives the nature of the high-bits |
936 | /// of boolean values held in types wider than i1. |
937 | /// |
938 | /// "Boolean values" are special true/false values produced by nodes like |
939 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
940 | /// Not to be confused with general values promoted from i1. Some cpus |
941 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
942 | /// selects between the two kinds. For example on X86 a scalar boolean should |
943 | /// be zero extended from i1, while the elements of a vector of booleans |
944 | /// should be sign extended from i1. |
945 | /// |
946 | /// Some cpus also treat floating point types the same way as they treat |
947 | /// vectors instead of the way they treat scalars. |
948 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
949 | if (isVec) |
950 | return BooleanVectorContents; |
951 | return isFloat ? BooleanFloatContents : BooleanContents; |
952 | } |
953 | |
954 | BooleanContent getBooleanContents(EVT Type) const { |
955 | return getBooleanContents(isVec: Type.isVector(), isFloat: Type.isFloatingPoint()); |
956 | } |
957 | |
958 | /// Promote the given target boolean to a target boolean of the given type. |
959 | /// A target boolean is an integer value, not necessarily of type i1, the bits |
960 | /// of which conform to getBooleanContents. |
961 | /// |
962 | /// ValVT is the type of values that produced the boolean. |
963 | SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, |
964 | EVT ValVT) const { |
965 | SDLoc dl(Bool); |
966 | EVT BoolVT = |
967 | getSetCCResultType(DL: DAG.getDataLayout(), Context&: *DAG.getContext(), VT: ValVT); |
968 | ISD::NodeType ExtendCode = getExtendForContent(Content: getBooleanContents(Type: ValVT)); |
969 | return DAG.getNode(Opcode: ExtendCode, DL: dl, VT: BoolVT, Operand: Bool); |
970 | } |
971 | |
972 | /// Return target scheduling preference. |
973 | Sched::Preference getSchedulingPreference() const { |
974 | return SchedPreferenceInfo; |
975 | } |
976 | |
977 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
978 | /// for different nodes. This function returns the preference (or none) for |
979 | /// the given node. |
980 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
981 | return Sched::None; |
982 | } |
983 | |
984 | /// Return the register class that should be used for the specified value |
985 | /// type. |
986 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
987 | (void)isDivergent; |
988 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
989 | assert(RC && "This value type is not natively supported!" ); |
990 | return RC; |
991 | } |
992 | |
993 | /// Allows target to decide about the register class of the |
994 | /// specific value that is live outside the defining block. |
995 | /// Returns true if the value needs uniform register class. |
996 | virtual bool requiresUniformRegister(MachineFunction &MF, |
997 | const Value *) const { |
998 | return false; |
999 | } |
1000 | |
1001 | /// Return the 'representative' register class for the specified value |
1002 | /// type. |
1003 | /// |
1004 | /// The 'representative' register class is the largest legal super-reg |
1005 | /// register class for the register class of the value type. For example, on |
1006 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
1007 | /// register class is GR64 on x86_64. |
1008 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
1009 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
1010 | return RC; |
1011 | } |
1012 | |
1013 | /// Return the cost of the 'representative' register class for the specified |
1014 | /// value type. |
1015 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
1016 | return RepRegClassCostForVT[VT.SimpleTy]; |
1017 | } |
1018 | |
1019 | /// Return the preferred strategy to legalize tihs SHIFT instruction, with |
1020 | /// \p ExpansionFactor being the recursion depth - how many expansion needed. |
1021 | enum class ShiftLegalizationStrategy { |
1022 | ExpandToParts, |
1023 | ExpandThroughStack, |
1024 | LowerToLibcall |
1025 | }; |
1026 | virtual ShiftLegalizationStrategy |
1027 | preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, |
1028 | unsigned ExpansionFactor) const { |
1029 | if (ExpansionFactor == 1) |
1030 | return ShiftLegalizationStrategy::ExpandToParts; |
1031 | return ShiftLegalizationStrategy::ExpandThroughStack; |
1032 | } |
1033 | |
1034 | /// Return true if the target has native support for the specified value type. |
1035 | /// This means that it has a register that directly holds it without |
1036 | /// promotions or expansions. |
1037 | bool isTypeLegal(EVT VT) const { |
1038 | assert(!VT.isSimple() || |
1039 | (unsigned)VT.getSimpleVT().SimpleTy < std::size(RegClassForVT)); |
1040 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
1041 | } |
1042 | |
1043 | class ValueTypeActionImpl { |
1044 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
1045 | /// that indicates how instruction selection should deal with the type. |
1046 | LegalizeTypeAction ValueTypeActions[MVT::VALUETYPE_SIZE]; |
1047 | |
1048 | public: |
1049 | ValueTypeActionImpl() { |
1050 | std::fill(first: std::begin(arr&: ValueTypeActions), last: std::end(arr&: ValueTypeActions), |
1051 | value: TypeLegal); |
1052 | } |
1053 | |
1054 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1055 | return ValueTypeActions[VT.SimpleTy]; |
1056 | } |
1057 | |
1058 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
1059 | ValueTypeActions[VT.SimpleTy] = Action; |
1060 | } |
1061 | }; |
1062 | |
1063 | const ValueTypeActionImpl &getValueTypeActions() const { |
1064 | return ValueTypeActions; |
1065 | } |
1066 | |
1067 | /// Return pair that represents the legalization kind (first) that needs to |
1068 | /// happen to EVT (second) in order to type-legalize it. |
1069 | /// |
1070 | /// First: how we should legalize values of this type, either it is already |
1071 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1072 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1073 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1074 | /// |
1075 | /// Second: for types supported by the target, this is an identity function. |
1076 | /// For types that must be promoted to larger types, this returns the larger |
1077 | /// type to promote to. For integer types that are larger than the largest |
1078 | /// integer register, this contains one step in the expansion to get to the |
1079 | /// smaller register. For illegal floating point types, this returns the |
1080 | /// integer type to transform to. |
1081 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
1082 | |
1083 | /// Return how we should legalize values of this type, either it is already |
1084 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
1085 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
1086 | /// integer type (return 'Expand'). 'Custom' is not an option. |
1087 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
1088 | return getTypeConversion(Context, VT).first; |
1089 | } |
1090 | LegalizeTypeAction getTypeAction(MVT VT) const { |
1091 | return ValueTypeActions.getTypeAction(VT); |
1092 | } |
1093 | |
1094 | /// For types supported by the target, this is an identity function. For |
1095 | /// types that must be promoted to larger types, this returns the larger type |
1096 | /// to promote to. For integer types that are larger than the largest integer |
1097 | /// register, this contains one step in the expansion to get to the smaller |
1098 | /// register. For illegal floating point types, this returns the integer type |
1099 | /// to transform to. |
1100 | virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
1101 | return getTypeConversion(Context, VT).second; |
1102 | } |
1103 | |
1104 | /// For types supported by the target, this is an identity function. For |
1105 | /// types that must be expanded (i.e. integer types that are larger than the |
1106 | /// largest integer register or illegal floating point types), this returns |
1107 | /// the largest legal type it will be expanded to. |
1108 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
1109 | assert(!VT.isVector()); |
1110 | while (true) { |
1111 | switch (getTypeAction(Context, VT)) { |
1112 | case TypeLegal: |
1113 | return VT; |
1114 | case TypeExpandInteger: |
1115 | VT = getTypeToTransformTo(Context, VT); |
1116 | break; |
1117 | default: |
1118 | llvm_unreachable("Type is not legal nor is it to be expanded!" ); |
1119 | } |
1120 | } |
1121 | } |
1122 | |
1123 | /// Vector types are broken down into some number of legal first class types. |
1124 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
1125 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
1126 | /// turns into 4 EVT::i32 values with both PPC and X86. |
1127 | /// |
1128 | /// This method returns the number of registers needed, and the VT for each |
1129 | /// register. It also returns the VT and quantity of the intermediate values |
1130 | /// before they are promoted/expanded. |
1131 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
1132 | EVT &IntermediateVT, |
1133 | unsigned &NumIntermediates, |
1134 | MVT &RegisterVT) const; |
1135 | |
1136 | /// Certain targets such as MIPS require that some types such as vectors are |
1137 | /// always broken down into scalars in some contexts. This occurs even if the |
1138 | /// vector type is legal. |
1139 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
1140 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
1141 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
1142 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
1143 | RegisterVT); |
1144 | } |
1145 | |
1146 | struct IntrinsicInfo { |
1147 | unsigned opc = 0; // target opcode |
1148 | EVT memVT; // memory VT |
1149 | |
1150 | // value representing memory location |
1151 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
1152 | |
1153 | // Fallback address space for use if ptrVal is nullptr. std::nullopt means |
1154 | // unknown address space. |
1155 | std::optional<unsigned> fallbackAddressSpace; |
1156 | |
1157 | int offset = 0; // offset off of ptrVal |
1158 | uint64_t size = 0; // the size of the memory location |
1159 | // (taken from memVT if zero) |
1160 | MaybeAlign align = Align(1); // alignment |
1161 | |
1162 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
1163 | IntrinsicInfo() = default; |
1164 | }; |
1165 | |
1166 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
1167 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
1168 | /// true and store the intrinsic information into the IntrinsicInfo that was |
1169 | /// passed to the function. |
1170 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
1171 | MachineFunction &, |
1172 | unsigned /*Intrinsic*/) const { |
1173 | return false; |
1174 | } |
1175 | |
1176 | /// Returns true if the target can instruction select the specified FP |
1177 | /// immediate natively. If false, the legalizer will materialize the FP |
1178 | /// immediate as a load from a constant pool. |
1179 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
1180 | bool ForCodeSize = false) const { |
1181 | return false; |
1182 | } |
1183 | |
1184 | /// Targets can use this to indicate that they only support *some* |
1185 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
1186 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
1187 | /// legal. |
1188 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
1189 | return true; |
1190 | } |
1191 | |
1192 | /// Returns true if the operation can trap for the value type. |
1193 | /// |
1194 | /// VT must be a legal type. By default, we optimistically assume most |
1195 | /// operations don't trap except for integer divide and remainder. |
1196 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
1197 | |
1198 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
1199 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
1200 | /// constant pool entry. |
1201 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
1202 | EVT /*VT*/) const { |
1203 | return false; |
1204 | } |
1205 | |
1206 | /// How to legalize this custom operation? |
1207 | virtual LegalizeAction getCustomOperationAction(SDNode &Op) const { |
1208 | return Legal; |
1209 | } |
1210 | |
1211 | /// Return how this operation should be treated: either it is legal, needs to |
1212 | /// be promoted to a larger size, needs to be expanded to some other code |
1213 | /// sequence, or the target has a custom expander for it. |
1214 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
1215 | if (VT.isExtended()) return Expand; |
1216 | // If a target-specific SDNode requires legalization, require the target |
1217 | // to provide custom legalization for it. |
1218 | if (Op >= std::size(OpActions[0])) |
1219 | return Custom; |
1220 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
1221 | } |
1222 | |
1223 | /// Custom method defined by each target to indicate if an operation which |
1224 | /// may require a scale is supported natively by the target. |
1225 | /// If not, the operation is illegal. |
1226 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
1227 | unsigned Scale) const { |
1228 | return false; |
1229 | } |
1230 | |
1231 | /// Some fixed point operations may be natively supported by the target but |
1232 | /// only for specific scales. This method allows for checking |
1233 | /// if the width is supported by the target for a given operation that may |
1234 | /// depend on scale. |
1235 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
1236 | unsigned Scale) const { |
1237 | auto Action = getOperationAction(Op, VT); |
1238 | if (Action != Legal) |
1239 | return Action; |
1240 | |
1241 | // This operation is supported in this type but may only work on specific |
1242 | // scales. |
1243 | bool Supported; |
1244 | switch (Op) { |
1245 | default: |
1246 | llvm_unreachable("Unexpected fixed point operation." ); |
1247 | case ISD::SMULFIX: |
1248 | case ISD::SMULFIXSAT: |
1249 | case ISD::UMULFIX: |
1250 | case ISD::UMULFIXSAT: |
1251 | case ISD::SDIVFIX: |
1252 | case ISD::SDIVFIXSAT: |
1253 | case ISD::UDIVFIX: |
1254 | case ISD::UDIVFIXSAT: |
1255 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
1256 | break; |
1257 | } |
1258 | |
1259 | return Supported ? Action : Expand; |
1260 | } |
1261 | |
1262 | // If Op is a strict floating-point operation, return the result |
1263 | // of getOperationAction for the equivalent non-strict operation. |
1264 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
1265 | unsigned EqOpc; |
1266 | switch (Op) { |
1267 | default: llvm_unreachable("Unexpected FP pseudo-opcode" ); |
1268 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1269 | case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; |
1270 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
1271 | case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; |
1272 | #include "llvm/IR/ConstrainedOps.def" |
1273 | } |
1274 | |
1275 | return getOperationAction(Op: EqOpc, VT); |
1276 | } |
1277 | |
1278 | /// Return true if the specified operation is legal on this target or can be |
1279 | /// made legal with custom lowering. This is used to help guide high-level |
1280 | /// lowering decisions. LegalOnly is an optional convenience for code paths |
1281 | /// traversed pre and post legalisation. |
1282 | bool isOperationLegalOrCustom(unsigned Op, EVT VT, |
1283 | bool LegalOnly = false) const { |
1284 | if (LegalOnly) |
1285 | return isOperationLegal(Op, VT); |
1286 | |
1287 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1288 | (getOperationAction(Op, VT) == Legal || |
1289 | getOperationAction(Op, VT) == Custom); |
1290 | } |
1291 | |
1292 | /// Return true if the specified operation is legal on this target or can be |
1293 | /// made legal using promotion. This is used to help guide high-level lowering |
1294 | /// decisions. LegalOnly is an optional convenience for code paths traversed |
1295 | /// pre and post legalisation. |
1296 | bool isOperationLegalOrPromote(unsigned Op, EVT VT, |
1297 | bool LegalOnly = false) const { |
1298 | if (LegalOnly) |
1299 | return isOperationLegal(Op, VT); |
1300 | |
1301 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1302 | (getOperationAction(Op, VT) == Legal || |
1303 | getOperationAction(Op, VT) == Promote); |
1304 | } |
1305 | |
1306 | /// Return true if the specified operation is legal on this target or can be |
1307 | /// made legal with custom lowering or using promotion. This is used to help |
1308 | /// guide high-level lowering decisions. LegalOnly is an optional convenience |
1309 | /// for code paths traversed pre and post legalisation. |
1310 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, |
1311 | bool LegalOnly = false) const { |
1312 | if (LegalOnly) |
1313 | return isOperationLegal(Op, VT); |
1314 | |
1315 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1316 | (getOperationAction(Op, VT) == Legal || |
1317 | getOperationAction(Op, VT) == Custom || |
1318 | getOperationAction(Op, VT) == Promote); |
1319 | } |
1320 | |
1321 | /// Return true if the operation uses custom lowering, regardless of whether |
1322 | /// the type is legal or not. |
1323 | bool isOperationCustom(unsigned Op, EVT VT) const { |
1324 | return getOperationAction(Op, VT) == Custom; |
1325 | } |
1326 | |
1327 | /// Return true if lowering to a jump table is allowed. |
1328 | virtual bool areJTsAllowed(const Function *Fn) const { |
1329 | if (Fn->getFnAttribute(Kind: "no-jump-tables" ).getValueAsBool()) |
1330 | return false; |
1331 | |
1332 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
1333 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
1334 | } |
1335 | |
1336 | /// Check whether the range [Low,High] fits in a machine word. |
1337 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
1338 | const DataLayout &DL) const { |
1339 | // FIXME: Using the pointer type doesn't seem ideal. |
1340 | uint64_t BW = DL.getIndexSizeInBits(AS: 0u); |
1341 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
1342 | return Range <= BW; |
1343 | } |
1344 | |
1345 | /// Return true if lowering to a jump table is suitable for a set of case |
1346 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
1347 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
1348 | uint64_t Range, ProfileSummaryInfo *PSI, |
1349 | BlockFrequencyInfo *BFI) const; |
1350 | |
1351 | /// Returns preferred type for switch condition. |
1352 | virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, |
1353 | EVT ConditionVT) const; |
1354 | |
1355 | /// Return true if lowering to a bit test is suitable for a set of case |
1356 | /// clusters which contains \p NumDests unique destinations, \p Low and |
1357 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
1358 | /// case value comparisons. Check if the number of destinations, comparison |
1359 | /// metric, and range are all suitable. |
1360 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
1361 | const APInt &Low, const APInt &High, |
1362 | const DataLayout &DL) const { |
1363 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
1364 | // range of cases both require only one branch to lower. Just looking at the |
1365 | // number of clusters and destinations should be enough to decide whether to |
1366 | // build bit tests. |
1367 | |
1368 | // To lower a range with bit tests, the range must fit the bitwidth of a |
1369 | // machine word. |
1370 | if (!rangeFitsInWord(Low, High, DL)) |
1371 | return false; |
1372 | |
1373 | // Decide whether it's profitable to lower this range with bit tests. Each |
1374 | // destination requires a bit test and branch, and there is an overall range |
1375 | // check branch. For a small number of clusters, separate comparisons might |
1376 | // be cheaper, and for many destinations, splitting the range might be |
1377 | // better. |
1378 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
1379 | (NumDests == 3 && NumCmps >= 6); |
1380 | } |
1381 | |
1382 | /// Return true if the specified operation is illegal on this target or |
1383 | /// unlikely to be made legal with custom lowering. This is used to help guide |
1384 | /// high-level lowering decisions. |
1385 | bool isOperationExpand(unsigned Op, EVT VT) const { |
1386 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
1387 | } |
1388 | |
1389 | /// Return true if the specified operation is legal on this target. |
1390 | bool isOperationLegal(unsigned Op, EVT VT) const { |
1391 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1392 | getOperationAction(Op, VT) == Legal; |
1393 | } |
1394 | |
1395 | /// Return how this load with extension should be treated: either it is legal, |
1396 | /// needs to be promoted to a larger size, needs to be expanded to some other |
1397 | /// code sequence, or the target has a custom expander for it. |
1398 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
1399 | EVT MemVT) const { |
1400 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1401 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1402 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1403 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::VALUETYPE_SIZE && |
1404 | MemI < MVT::VALUETYPE_SIZE && "Table isn't big enough!" ); |
1405 | unsigned Shift = 4 * ExtType; |
1406 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1407 | } |
1408 | |
1409 | /// Return true if the specified load with extension is legal on this target. |
1410 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1411 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1412 | } |
1413 | |
1414 | /// Return true if the specified load with extension is legal or custom |
1415 | /// on this target. |
1416 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1417 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
1418 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
1419 | } |
1420 | |
1421 | /// Return how this store with truncation should be treated: either it is |
1422 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
1423 | /// other code sequence, or the target has a custom expander for it. |
1424 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
1425 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1426 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1427 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1428 | assert(ValI < MVT::VALUETYPE_SIZE && MemI < MVT::VALUETYPE_SIZE && |
1429 | "Table isn't big enough!" ); |
1430 | return TruncStoreActions[ValI][MemI]; |
1431 | } |
1432 | |
1433 | /// Return true if the specified store with truncation is legal on this |
1434 | /// target. |
1435 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
1436 | return isTypeLegal(VT: ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
1437 | } |
1438 | |
1439 | /// Return true if the specified store with truncation has solution on this |
1440 | /// target. |
1441 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
1442 | return isTypeLegal(VT: ValVT) && |
1443 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
1444 | getTruncStoreAction(ValVT, MemVT) == Custom); |
1445 | } |
1446 | |
1447 | virtual bool canCombineTruncStore(EVT ValVT, EVT MemVT, |
1448 | bool LegalOnly) const { |
1449 | if (LegalOnly) |
1450 | return isTruncStoreLegal(ValVT, MemVT); |
1451 | |
1452 | return isTruncStoreLegalOrCustom(ValVT, MemVT); |
1453 | } |
1454 | |
1455 | /// Return how the indexed load should be treated: either it is legal, needs |
1456 | /// to be promoted to a larger size, needs to be expanded to some other code |
1457 | /// sequence, or the target has a custom expander for it. |
1458 | LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
1459 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Load); |
1460 | } |
1461 | |
1462 | /// Return true if the specified indexed load is legal on this target. |
1463 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
1464 | return VT.isSimple() && |
1465 | (getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1466 | getIndexedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1467 | } |
1468 | |
1469 | /// Return how the indexed store should be treated: either it is legal, needs |
1470 | /// to be promoted to a larger size, needs to be expanded to some other code |
1471 | /// sequence, or the target has a custom expander for it. |
1472 | LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
1473 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_Store); |
1474 | } |
1475 | |
1476 | /// Return true if the specified indexed load is legal on this target. |
1477 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
1478 | return VT.isSimple() && |
1479 | (getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1480 | getIndexedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1481 | } |
1482 | |
1483 | /// Return how the indexed load should be treated: either it is legal, needs |
1484 | /// to be promoted to a larger size, needs to be expanded to some other code |
1485 | /// sequence, or the target has a custom expander for it. |
1486 | LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { |
1487 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad); |
1488 | } |
1489 | |
1490 | /// Return true if the specified indexed load is legal on this target. |
1491 | bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { |
1492 | return VT.isSimple() && |
1493 | (getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1494 | getIndexedMaskedLoadAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1495 | } |
1496 | |
1497 | /// Return how the indexed store should be treated: either it is legal, needs |
1498 | /// to be promoted to a larger size, needs to be expanded to some other code |
1499 | /// sequence, or the target has a custom expander for it. |
1500 | LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { |
1501 | return getIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore); |
1502 | } |
1503 | |
1504 | /// Return true if the specified indexed load is legal on this target. |
1505 | bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { |
1506 | return VT.isSimple() && |
1507 | (getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Legal || |
1508 | getIndexedMaskedStoreAction(IdxMode, VT: VT.getSimpleVT()) == Custom); |
1509 | } |
1510 | |
1511 | /// Returns true if the index type for a masked gather/scatter requires |
1512 | /// extending |
1513 | virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; } |
1514 | |
1515 | // Returns true if Extend can be folded into the index of a masked gathers/scatters |
1516 | // on this target. |
1517 | virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const { |
1518 | return false; |
1519 | } |
1520 | |
1521 | // Return true if the target supports a scatter/gather instruction with |
1522 | // indices which are scaled by the particular value. Note that all targets |
1523 | // must by definition support scale of 1. |
1524 | virtual bool isLegalScaleForGatherScatter(uint64_t Scale, |
1525 | uint64_t ElemSize) const { |
1526 | // MGATHER/MSCATTER are only required to support scaling by one or by the |
1527 | // element size. |
1528 | if (Scale != ElemSize && Scale != 1) |
1529 | return false; |
1530 | return true; |
1531 | } |
1532 | |
1533 | /// Return how the condition code should be treated: either it is legal, needs |
1534 | /// to be expanded to some other code sequence, or the target has a custom |
1535 | /// expander for it. |
1536 | LegalizeAction |
1537 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
1538 | assert((unsigned)CC < std::size(CondCodeActions) && |
1539 | ((unsigned)VT.SimpleTy >> 3) < std::size(CondCodeActions[0]) && |
1540 | "Table isn't big enough!" ); |
1541 | // See setCondCodeAction for how this is encoded. |
1542 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1543 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
1544 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
1545 | assert(Action != Promote && "Can't promote condition code!" ); |
1546 | return Action; |
1547 | } |
1548 | |
1549 | /// Return true if the specified condition code is legal on this target. |
1550 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
1551 | return getCondCodeAction(CC, VT) == Legal; |
1552 | } |
1553 | |
1554 | /// Return true if the specified condition code is legal or custom on this |
1555 | /// target. |
1556 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
1557 | return getCondCodeAction(CC, VT) == Legal || |
1558 | getCondCodeAction(CC, VT) == Custom; |
1559 | } |
1560 | |
1561 | /// If the action for this operation is to promote, this method returns the |
1562 | /// ValueType to promote to. |
1563 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
1564 | assert(getOperationAction(Op, VT) == Promote && |
1565 | "This operation isn't promoted!" ); |
1566 | |
1567 | // See if this has an explicit type specified. |
1568 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1569 | MVT::SimpleValueType>::const_iterator PTTI = |
1570 | PromoteToType.find(x: std::make_pair(x&: Op, y&: VT.SimpleTy)); |
1571 | if (PTTI != PromoteToType.end()) return PTTI->second; |
1572 | |
1573 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
1574 | "Cannot autopromote this type, add it with AddPromotedToType." ); |
1575 | |
1576 | MVT NVT = VT; |
1577 | do { |
1578 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1579 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
1580 | "Didn't find type to promote to!" ); |
1581 | } while (!isTypeLegal(VT: NVT) || |
1582 | getOperationAction(Op, VT: NVT) == Promote); |
1583 | return NVT; |
1584 | } |
1585 | |
1586 | virtual EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty, |
1587 | bool AllowUnknown = false) const { |
1588 | return getValueType(DL, Ty, AllowUnknown); |
1589 | } |
1590 | |
1591 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1592 | /// operations except for the pointer size. If AllowUnknown is true, this |
1593 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1594 | /// otherwise it will assert. |
1595 | EVT getValueType(const DataLayout &DL, Type *Ty, |
1596 | bool AllowUnknown = false) const { |
1597 | // Lower scalar pointers to native pointer types. |
1598 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1599 | return getPointerTy(DL, AS: PTy->getAddressSpace()); |
1600 | |
1601 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1602 | Type *EltTy = VTy->getElementType(); |
1603 | // Lower vectors of pointers to native pointer types. |
1604 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1605 | EVT PointerTy(getPointerTy(DL, AS: PTy->getAddressSpace())); |
1606 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1607 | } |
1608 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1609 | EC: VTy->getElementCount()); |
1610 | } |
1611 | |
1612 | return EVT::getEVT(Ty, HandleUnknown: AllowUnknown); |
1613 | } |
1614 | |
1615 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
1616 | bool AllowUnknown = false) const { |
1617 | // Lower scalar pointers to native pointer types. |
1618 | if (auto *PTy = dyn_cast<PointerType>(Val: Ty)) |
1619 | return getPointerMemTy(DL, AS: PTy->getAddressSpace()); |
1620 | |
1621 | if (auto *VTy = dyn_cast<VectorType>(Val: Ty)) { |
1622 | Type *EltTy = VTy->getElementType(); |
1623 | if (auto *PTy = dyn_cast<PointerType>(Val: EltTy)) { |
1624 | EVT PointerTy(getPointerMemTy(DL, AS: PTy->getAddressSpace())); |
1625 | EltTy = PointerTy.getTypeForEVT(Context&: Ty->getContext()); |
1626 | } |
1627 | return EVT::getVectorVT(Context&: Ty->getContext(), VT: EVT::getEVT(Ty: EltTy, HandleUnknown: false), |
1628 | EC: VTy->getElementCount()); |
1629 | } |
1630 | |
1631 | return getValueType(DL, Ty, AllowUnknown); |
1632 | } |
1633 | |
1634 | |
1635 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1636 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1637 | bool AllowUnknown = false) const { |
1638 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1639 | } |
1640 | |
1641 | /// Return the desired alignment for ByVal or InAlloca aggregate function |
1642 | /// arguments in the caller parameter area. This is the actual alignment, not |
1643 | /// its logarithm. |
1644 | virtual uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1645 | |
1646 | /// Return the type of registers that this ValueType will eventually require. |
1647 | MVT getRegisterType(MVT VT) const { |
1648 | assert((unsigned)VT.SimpleTy < std::size(RegisterTypeForVT)); |
1649 | return RegisterTypeForVT[VT.SimpleTy]; |
1650 | } |
1651 | |
1652 | /// Return the type of registers that this ValueType will eventually require. |
1653 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1654 | if (VT.isSimple()) |
1655 | return getRegisterType(VT: VT.getSimpleVT()); |
1656 | if (VT.isVector()) { |
1657 | EVT VT1; |
1658 | MVT RegisterVT; |
1659 | unsigned NumIntermediates; |
1660 | (void)getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, |
1661 | NumIntermediates, RegisterVT); |
1662 | return RegisterVT; |
1663 | } |
1664 | if (VT.isInteger()) { |
1665 | return getRegisterType(Context, VT: getTypeToTransformTo(Context, VT)); |
1666 | } |
1667 | llvm_unreachable("Unsupported extended type!" ); |
1668 | } |
1669 | |
1670 | /// Return the number of registers that this ValueType will eventually |
1671 | /// require. |
1672 | /// |
1673 | /// This is one for any types promoted to live in larger registers, but may be |
1674 | /// more than one for types (like i64) that are split into pieces. For types |
1675 | /// like i140, which are first promoted then expanded, it is the number of |
1676 | /// registers needed to hold all the bits of the original type. For an i140 |
1677 | /// on a 32 bit machine this means 5 registers. |
1678 | /// |
1679 | /// RegisterVT may be passed as a way to override the default settings, for |
1680 | /// instance with i128 inline assembly operands on SystemZ. |
1681 | virtual unsigned |
1682 | getNumRegisters(LLVMContext &Context, EVT VT, |
1683 | std::optional<MVT> RegisterVT = std::nullopt) const { |
1684 | if (VT.isSimple()) { |
1685 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
1686 | std::size(NumRegistersForVT)); |
1687 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1688 | } |
1689 | if (VT.isVector()) { |
1690 | EVT VT1; |
1691 | MVT VT2; |
1692 | unsigned NumIntermediates; |
1693 | return getVectorTypeBreakdown(Context, VT, IntermediateVT&: VT1, NumIntermediates, RegisterVT&: VT2); |
1694 | } |
1695 | if (VT.isInteger()) { |
1696 | unsigned BitWidth = VT.getSizeInBits(); |
1697 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1698 | return (BitWidth + RegWidth - 1) / RegWidth; |
1699 | } |
1700 | llvm_unreachable("Unsupported extended type!" ); |
1701 | } |
1702 | |
1703 | /// Certain combinations of ABIs, Targets and features require that types |
1704 | /// are legal for some operations and not for other operations. |
1705 | /// For MIPS all vector types must be passed through the integer register set. |
1706 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1707 | CallingConv::ID CC, EVT VT) const { |
1708 | return getRegisterType(Context, VT); |
1709 | } |
1710 | |
1711 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1712 | /// this occurs when a vector type is used, as vector are passed through the |
1713 | /// integer register set. |
1714 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1715 | CallingConv::ID CC, |
1716 | EVT VT) const { |
1717 | return getNumRegisters(Context, VT); |
1718 | } |
1719 | |
1720 | /// Certain targets have context sensitive alignment requirements, where one |
1721 | /// type has the alignment requirement of another type. |
1722 | virtual Align getABIAlignmentForCallingConv(Type *ArgTy, |
1723 | const DataLayout &DL) const { |
1724 | return DL.getABITypeAlign(Ty: ArgTy); |
1725 | } |
1726 | |
1727 | /// If true, then instruction selection should seek to shrink the FP constant |
1728 | /// of the specified type to a smaller type in order to save space and / or |
1729 | /// reduce runtime. |
1730 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1731 | |
1732 | /// Return true if it is profitable to reduce a load to a smaller type. |
1733 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
1734 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
1735 | EVT NewVT) const { |
1736 | // By default, assume that it is cheaper to extract a subvector from a wide |
1737 | // vector load rather than creating multiple narrow vector loads. |
1738 | if (NewVT.isVector() && !Load->hasOneUse()) |
1739 | return false; |
1740 | |
1741 | return true; |
1742 | } |
1743 | |
1744 | /// Return true (the default) if it is profitable to remove a sext_inreg(x) |
1745 | /// where the sext is redundant, and use x directly. |
1746 | virtual bool shouldRemoveRedundantExtend(SDValue Op) const { return true; } |
1747 | |
1748 | /// When splitting a value of the specified type into parts, does the Lo |
1749 | /// or Hi part come first? This usually follows the endianness, except |
1750 | /// for ppcf128, where the Hi part always comes first. |
1751 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1752 | return DL.isBigEndian() || VT == MVT::ppcf128; |
1753 | } |
1754 | |
1755 | /// If true, the target has custom DAG combine transformations that it can |
1756 | /// perform for the specified node. |
1757 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1758 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
1759 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1760 | } |
1761 | |
1762 | unsigned getGatherAllAliasesMaxDepth() const { |
1763 | return GatherAllAliasesMaxDepth; |
1764 | } |
1765 | |
1766 | /// Returns the size of the platform's va_list object. |
1767 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1768 | return getPointerTy(DL).getSizeInBits(); |
1769 | } |
1770 | |
1771 | /// Get maximum # of store operations permitted for llvm.memset |
1772 | /// |
1773 | /// This function returns the maximum number of store operations permitted |
1774 | /// to replace a call to llvm.memset. The value is set by the target at the |
1775 | /// performance threshold for such a replacement. If OptSize is true, |
1776 | /// return the limit for functions that have OptSize attribute. |
1777 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1778 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
1779 | } |
1780 | |
1781 | /// Get maximum # of store operations permitted for llvm.memcpy |
1782 | /// |
1783 | /// This function returns the maximum number of store operations permitted |
1784 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1785 | /// performance threshold for such a replacement. If OptSize is true, |
1786 | /// return the limit for functions that have OptSize attribute. |
1787 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1788 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
1789 | } |
1790 | |
1791 | /// \brief Get maximum # of store operations to be glued together |
1792 | /// |
1793 | /// This function returns the maximum number of store operations permitted |
1794 | /// to glue together during lowering of llvm.memcpy. The value is set by |
1795 | // the target at the performance threshold for such a replacement. |
1796 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
1797 | return MaxGluedStoresPerMemcpy; |
1798 | } |
1799 | |
1800 | /// Get maximum # of load operations permitted for memcmp |
1801 | /// |
1802 | /// This function returns the maximum number of load operations permitted |
1803 | /// to replace a call to memcmp. The value is set by the target at the |
1804 | /// performance threshold for such a replacement. If OptSize is true, |
1805 | /// return the limit for functions that have OptSize attribute. |
1806 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1807 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
1808 | } |
1809 | |
1810 | /// Get maximum # of store operations permitted for llvm.memmove |
1811 | /// |
1812 | /// This function returns the maximum number of store operations permitted |
1813 | /// to replace a call to llvm.memmove. The value is set by the target at the |
1814 | /// performance threshold for such a replacement. If OptSize is true, |
1815 | /// return the limit for functions that have OptSize attribute. |
1816 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1817 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
1818 | } |
1819 | |
1820 | /// Determine if the target supports unaligned memory accesses. |
1821 | /// |
1822 | /// This function returns true if the target allows unaligned memory accesses |
1823 | /// of the specified type in the given address space. If true, it also returns |
1824 | /// a relative speed of the unaligned memory access in the last argument by |
1825 | /// reference. The higher the speed number the faster the operation comparing |
1826 | /// to a number returned by another such call. This is used, for example, in |
1827 | /// situations where an array copy/move/set is converted to a sequence of |
1828 | /// store operations. Its use helps to ensure that such replacements don't |
1829 | /// generate code that causes an alignment error (trap) on the target machine. |
1830 | virtual bool allowsMisalignedMemoryAccesses( |
1831 | EVT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1832 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1833 | unsigned * /*Fast*/ = nullptr) const { |
1834 | return false; |
1835 | } |
1836 | |
1837 | /// LLT handling variant. |
1838 | virtual bool allowsMisalignedMemoryAccesses( |
1839 | LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), |
1840 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1841 | unsigned * /*Fast*/ = nullptr) const { |
1842 | return false; |
1843 | } |
1844 | |
1845 | /// This function returns true if the memory access is aligned or if the |
1846 | /// target allows this specific unaligned memory access. If the access is |
1847 | /// allowed, the optional final parameter returns a relative speed of the |
1848 | /// access (as defined by the target). |
1849 | bool allowsMemoryAccessForAlignment( |
1850 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1851 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1852 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1853 | unsigned *Fast = nullptr) const; |
1854 | |
1855 | /// Return true if the memory access of this type is aligned or if the target |
1856 | /// allows this specific unaligned access for the given MachineMemOperand. |
1857 | /// If the access is allowed, the optional final parameter returns a relative |
1858 | /// speed of the access (as defined by the target). |
1859 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, |
1860 | const DataLayout &DL, EVT VT, |
1861 | const MachineMemOperand &MMO, |
1862 | unsigned *Fast = nullptr) const; |
1863 | |
1864 | /// Return true if the target supports a memory access of this type for the |
1865 | /// given address space and alignment. If the access is allowed, the optional |
1866 | /// final parameter returns the relative speed of the access (as defined by |
1867 | /// the target). |
1868 | virtual bool |
1869 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1870 | unsigned AddrSpace = 0, Align Alignment = Align(1), |
1871 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1872 | unsigned *Fast = nullptr) const; |
1873 | |
1874 | /// Return true if the target supports a memory access of this type for the |
1875 | /// given MachineMemOperand. If the access is allowed, the optional |
1876 | /// final parameter returns the relative access speed (as defined by the |
1877 | /// target). |
1878 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1879 | const MachineMemOperand &MMO, |
1880 | unsigned *Fast = nullptr) const; |
1881 | |
1882 | /// LLT handling variant. |
1883 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, LLT Ty, |
1884 | const MachineMemOperand &MMO, |
1885 | unsigned *Fast = nullptr) const; |
1886 | |
1887 | /// Returns the target specific optimal type for load and store operations as |
1888 | /// a result of memset, memcpy, and memmove lowering. |
1889 | /// It returns EVT::Other if the type should be determined using generic |
1890 | /// target-independent logic. |
1891 | virtual EVT |
1892 | getOptimalMemOpType(const MemOp &Op, |
1893 | const AttributeList & /*FuncAttributes*/) const { |
1894 | return MVT::Other; |
1895 | } |
1896 | |
1897 | /// LLT returning variant. |
1898 | virtual LLT |
1899 | getOptimalMemOpLLT(const MemOp &Op, |
1900 | const AttributeList & /*FuncAttributes*/) const { |
1901 | return LLT(); |
1902 | } |
1903 | |
1904 | /// Returns true if it's safe to use load / store of the specified type to |
1905 | /// expand memcpy / memset inline. |
1906 | /// |
1907 | /// This is mostly true for all types except for some special cases. For |
1908 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
1909 | /// fstpl which also does type conversion. Note the specified type doesn't |
1910 | /// have to be legal as the hook is used before type legalization. |
1911 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
1912 | |
1913 | /// Return lower limit for number of blocks in a jump table. |
1914 | virtual unsigned getMinimumJumpTableEntries() const; |
1915 | |
1916 | /// Return lower limit of the density in a jump table. |
1917 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
1918 | |
1919 | /// Return upper limit for number of entries in a jump table. |
1920 | /// Zero if no limit. |
1921 | unsigned getMaximumJumpTableSize() const; |
1922 | |
1923 | virtual bool isJumpTableRelative() const; |
1924 | |
1925 | /// If a physical register, this specifies the register that |
1926 | /// llvm.savestack/llvm.restorestack should save and restore. |
1927 | Register getStackPointerRegisterToSaveRestore() const { |
1928 | return StackPointerRegisterToSaveRestore; |
1929 | } |
1930 | |
1931 | /// If a physical register, this returns the register that receives the |
1932 | /// exception address on entry to an EH pad. |
1933 | virtual Register |
1934 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
1935 | return Register(); |
1936 | } |
1937 | |
1938 | /// If a physical register, this returns the register that receives the |
1939 | /// exception typeid on entry to a landing pad. |
1940 | virtual Register |
1941 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
1942 | return Register(); |
1943 | } |
1944 | |
1945 | virtual bool needsFixedCatchObjects() const { |
1946 | report_fatal_error(reason: "Funclet EH is not implemented for this target" ); |
1947 | } |
1948 | |
1949 | /// Return the minimum stack alignment of an argument. |
1950 | Align getMinStackArgumentAlignment() const { |
1951 | return MinStackArgumentAlignment; |
1952 | } |
1953 | |
1954 | /// Return the minimum function alignment. |
1955 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } |
1956 | |
1957 | /// Return the preferred function alignment. |
1958 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } |
1959 | |
1960 | /// Return the preferred loop alignment. |
1961 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const; |
1962 | |
1963 | /// Return the maximum amount of bytes allowed to be emitted when padding for |
1964 | /// alignment |
1965 | virtual unsigned |
1966 | getMaxPermittedBytesForAlignment(MachineBasicBlock *MBB) const; |
1967 | |
1968 | /// Should loops be aligned even when the function is marked OptSize (but not |
1969 | /// MinSize). |
1970 | virtual bool alignLoopsWithOptSize() const { return false; } |
1971 | |
1972 | /// If the target has a standard location for the stack protector guard, |
1973 | /// returns the address of that location. Otherwise, returns nullptr. |
1974 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
1975 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
1976 | virtual Value *getIRStackGuard(IRBuilderBase &IRB) const; |
1977 | |
1978 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
1979 | /// Should be used only when getIRStackGuard returns nullptr. |
1980 | virtual void insertSSPDeclarations(Module &M) const; |
1981 | |
1982 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
1983 | /// if any, otherwise return nullptr. Should be used only when |
1984 | /// getIRStackGuard returns nullptr. |
1985 | virtual Value *getSDagStackGuard(const Module &M) const; |
1986 | |
1987 | /// If this function returns true, stack protection checks should XOR the |
1988 | /// frame pointer (or whichever pointer is used to address locals) into the |
1989 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
1990 | /// if this returns true. |
1991 | virtual bool useStackGuardXorFP() const { return false; } |
1992 | |
1993 | /// If the target has a standard stack protection check function that |
1994 | /// performs validation and error handling, returns the function. Otherwise, |
1995 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
1996 | /// Should be used only when getIRStackGuard returns nullptr. |
1997 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
1998 | |
1999 | protected: |
2000 | Value *getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
2001 | bool UseTLS) const; |
2002 | |
2003 | public: |
2004 | /// Returns the target-specific address of the unsafe stack pointer. |
2005 | virtual Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const; |
2006 | |
2007 | /// Returns the name of the symbol used to emit stack probes or the empty |
2008 | /// string if not applicable. |
2009 | virtual bool hasStackProbeSymbol(const MachineFunction &MF) const { return false; } |
2010 | |
2011 | virtual bool hasInlineStackProbe(const MachineFunction &MF) const { return false; } |
2012 | |
2013 | virtual StringRef getStackProbeSymbolName(const MachineFunction &MF) const { |
2014 | return "" ; |
2015 | } |
2016 | |
2017 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
2018 | /// are happy to sink it into basic blocks. A cast may be free, but not |
2019 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
2020 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; |
2021 | |
2022 | /// Return true if the pointer arguments to CI should be aligned by aligning |
2023 | /// the object whose address is being passed. If so then MinSize is set to the |
2024 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
2025 | /// preferred alignment. |
2026 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
2027 | Align & /*PrefAlign*/) const { |
2028 | return false; |
2029 | } |
2030 | |
2031 | //===--------------------------------------------------------------------===// |
2032 | /// \name Helpers for TargetTransformInfo implementations |
2033 | /// @{ |
2034 | |
2035 | /// Get the ISD node that corresponds to the Instruction class opcode. |
2036 | int InstructionOpcodeToISD(unsigned Opcode) const; |
2037 | |
2038 | /// @} |
2039 | |
2040 | //===--------------------------------------------------------------------===// |
2041 | /// \name Helpers for atomic expansion. |
2042 | /// @{ |
2043 | |
2044 | /// Returns the maximum atomic operation size (in bits) supported by |
2045 | /// the backend. Atomic operations greater than this size (as well |
2046 | /// as ones that are not naturally aligned), will be expanded by |
2047 | /// AtomicExpandPass into an __atomic_* library call. |
2048 | unsigned getMaxAtomicSizeInBitsSupported() const { |
2049 | return MaxAtomicSizeInBitsSupported; |
2050 | } |
2051 | |
2052 | /// Returns the size in bits of the maximum div/rem the backend supports. |
2053 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2054 | unsigned getMaxDivRemBitWidthSupported() const { |
2055 | return MaxDivRemBitWidthSupported; |
2056 | } |
2057 | |
2058 | /// Returns the size in bits of the maximum larget fp convert the backend |
2059 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
2060 | unsigned getMaxLargeFPConvertBitWidthSupported() const { |
2061 | return MaxLargeFPConvertBitWidthSupported; |
2062 | } |
2063 | |
2064 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
2065 | /// the backend supports. Any smaller operations are widened in |
2066 | /// AtomicExpandPass. |
2067 | /// |
2068 | /// Note that *unlike* operations above the maximum size, atomic ops |
2069 | /// are still natively supported below the minimum; they just |
2070 | /// require a more complex expansion. |
2071 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
2072 | |
2073 | /// Whether the target supports unaligned atomic operations. |
2074 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
2075 | |
2076 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
2077 | /// ordering for this atomic. This should be true for most architectures with |
2078 | /// weak memory ordering. Defaults to false. |
2079 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
2080 | return false; |
2081 | } |
2082 | |
2083 | /// Whether AtomicExpandPass should automatically insert a trailing fence |
2084 | /// without reducing the ordering for this atomic. Defaults to false. |
2085 | virtual bool |
2086 | shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const { |
2087 | return false; |
2088 | } |
2089 | |
2090 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
2091 | /// corresponding pointee type. This may entail some non-trivial operations to |
2092 | /// truncate or reconstruct types that will be illegal in the backend. See |
2093 | /// ARMISelLowering for an example implementation. |
2094 | virtual Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, |
2095 | Value *Addr, AtomicOrdering Ord) const { |
2096 | llvm_unreachable("Load linked unimplemented on this target" ); |
2097 | } |
2098 | |
2099 | /// Perform a store-conditional operation to Addr. Return the status of the |
2100 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
2101 | virtual Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, |
2102 | Value *Addr, AtomicOrdering Ord) const { |
2103 | llvm_unreachable("Store conditional unimplemented on this target" ); |
2104 | } |
2105 | |
2106 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
2107 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2108 | /// the backend. The target-specific intrinsic returns the loaded value and |
2109 | /// is not responsible for masking and shifting the result. |
2110 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, |
2111 | AtomicRMWInst *AI, |
2112 | Value *AlignedAddr, Value *Incr, |
2113 | Value *Mask, Value *ShiftAmt, |
2114 | AtomicOrdering Ord) const { |
2115 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target" ); |
2116 | } |
2117 | |
2118 | /// Perform a atomicrmw expansion using a target-specific way. This is |
2119 | /// expected to be called when masked atomicrmw and bit test atomicrmw don't |
2120 | /// work, and the target supports another way to lower atomicrmw. |
2121 | virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const { |
2122 | llvm_unreachable( |
2123 | "Generic atomicrmw expansion unimplemented on this target" ); |
2124 | } |
2125 | |
2126 | /// Perform a bit test atomicrmw using a target-specific intrinsic. This |
2127 | /// represents the combined bit test intrinsic which will be lowered at a late |
2128 | /// stage by the backend. |
2129 | virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2130 | llvm_unreachable( |
2131 | "Bit test atomicrmw expansion unimplemented on this target" ); |
2132 | } |
2133 | |
2134 | /// Perform a atomicrmw which the result is only used by comparison, using a |
2135 | /// target-specific intrinsic. This represents the combined atomic and compare |
2136 | /// intrinsic which will be lowered at a late stage by the backend. |
2137 | virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const { |
2138 | llvm_unreachable( |
2139 | "Compare arith atomicrmw expansion unimplemented on this target" ); |
2140 | } |
2141 | |
2142 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
2143 | /// represents the core LL/SC loop which will be lowered at a late stage by |
2144 | /// the backend. The target-specific intrinsic returns the loaded value and |
2145 | /// is not responsible for masking and shifting the result. |
2146 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
2147 | IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
2148 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
2149 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target" ); |
2150 | } |
2151 | |
2152 | //===--------------------------------------------------------------------===// |
2153 | /// \name KCFI check lowering. |
2154 | /// @{ |
2155 | |
2156 | virtual MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, |
2157 | MachineBasicBlock::instr_iterator &MBBI, |
2158 | const TargetInstrInfo *TII) const { |
2159 | llvm_unreachable("KCFI is not supported on this target" ); |
2160 | } |
2161 | |
2162 | /// @} |
2163 | |
2164 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
2165 | /// It is called by AtomicExpandPass before expanding an |
2166 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
2167 | /// if shouldInsertFencesForAtomic returns true. |
2168 | /// |
2169 | /// Inst is the original atomic instruction, prior to other expansions that |
2170 | /// may be performed. |
2171 | /// |
2172 | /// This function should either return a nullptr, or a pointer to an IR-level |
2173 | /// Instruction*. Even complex fence sequences can be represented by a |
2174 | /// single Instruction* through an intrinsic to be lowered later. |
2175 | /// |
2176 | /// The default implementation emits an IR fence before any release (or |
2177 | /// stronger) operation that stores, and after any acquire (or stronger) |
2178 | /// operation. This is generally a correct implementation, but backends may |
2179 | /// override if they wish to use alternative schemes (e.g. the PowerPC |
2180 | /// standard ABI uses a fence before a seq_cst load instead of after a |
2181 | /// seq_cst store). |
2182 | /// @{ |
2183 | virtual Instruction *emitLeadingFence(IRBuilderBase &Builder, |
2184 | Instruction *Inst, |
2185 | AtomicOrdering Ord) const; |
2186 | |
2187 | virtual Instruction *emitTrailingFence(IRBuilderBase &Builder, |
2188 | Instruction *Inst, |
2189 | AtomicOrdering Ord) const; |
2190 | /// @} |
2191 | |
2192 | // Emits code that executes when the comparison result in the ll/sc |
2193 | // expansion of a cmpxchg instruction is such that the store-conditional will |
2194 | // not execute. This makes it possible to balance out the load-linked with |
2195 | // a dedicated instruction, if desired. |
2196 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
2197 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
2198 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const {} |
2199 | |
2200 | /// Returns true if arguments should be sign-extended in lib calls. |
2201 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
2202 | return IsSigned; |
2203 | } |
2204 | |
2205 | /// Returns true if arguments should be extended in lib calls. |
2206 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { |
2207 | return true; |
2208 | } |
2209 | |
2210 | /// Returns how the given (atomic) load should be expanded by the |
2211 | /// IR-level AtomicExpand pass. |
2212 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
2213 | return AtomicExpansionKind::None; |
2214 | } |
2215 | |
2216 | /// Returns how the given (atomic) load should be cast by the IR-level |
2217 | /// AtomicExpand pass. |
2218 | virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const { |
2219 | if (LI->getType()->isFloatingPointTy()) |
2220 | return AtomicExpansionKind::CastToInteger; |
2221 | return AtomicExpansionKind::None; |
2222 | } |
2223 | |
2224 | /// Returns how the given (atomic) store should be expanded by the IR-level |
2225 | /// AtomicExpand pass into. For instance AtomicExpansionKind::Expand will try |
2226 | /// to use an atomicrmw xchg. |
2227 | virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
2228 | return AtomicExpansionKind::None; |
2229 | } |
2230 | |
2231 | /// Returns how the given (atomic) store should be cast by the IR-level |
2232 | /// AtomicExpand pass into. For instance AtomicExpansionKind::CastToInteger |
2233 | /// will try to cast the operands to integer values. |
2234 | virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const { |
2235 | if (SI->getValueOperand()->getType()->isFloatingPointTy()) |
2236 | return AtomicExpansionKind::CastToInteger; |
2237 | return AtomicExpansionKind::None; |
2238 | } |
2239 | |
2240 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
2241 | /// AtomicExpand pass. |
2242 | virtual AtomicExpansionKind |
2243 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
2244 | return AtomicExpansionKind::None; |
2245 | } |
2246 | |
2247 | /// Returns how the IR-level AtomicExpand pass should expand the given |
2248 | /// AtomicRMW, if at all. Default is to never expand. |
2249 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
2250 | return RMW->isFloatingPointOperation() ? |
2251 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
2252 | } |
2253 | |
2254 | /// Returns how the given atomic atomicrmw should be cast by the IR-level |
2255 | /// AtomicExpand pass. |
2256 | virtual AtomicExpansionKind |
2257 | shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const { |
2258 | if (RMWI->getOperation() == AtomicRMWInst::Xchg && |
2259 | (RMWI->getValOperand()->getType()->isFloatingPointTy() || |
2260 | RMWI->getValOperand()->getType()->isPointerTy())) |
2261 | return AtomicExpansionKind::CastToInteger; |
2262 | |
2263 | return AtomicExpansionKind::None; |
2264 | } |
2265 | |
2266 | /// On some platforms, an AtomicRMW that never actually modifies the value |
2267 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
2268 | /// atomic load. This may sound useless, but it makes it possible for the |
2269 | /// processor to keep the cacheline shared, dramatically improving |
2270 | /// performance. And such idempotent RMWs are useful for implementing some |
2271 | /// kinds of locks, see for example (justification + benchmarks): |
2272 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
2273 | /// This method tries doing that transformation, returning the atomic load if |
2274 | /// it succeeds, and nullptr otherwise. |
2275 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
2276 | /// another round of expansion. |
2277 | virtual LoadInst * |
2278 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
2279 | return nullptr; |
2280 | } |
2281 | |
2282 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
2283 | /// SIGN_EXTEND, or ANY_EXTEND). |
2284 | virtual ISD::NodeType getExtendForAtomicOps() const { |
2285 | return ISD::ZERO_EXTEND; |
2286 | } |
2287 | |
2288 | /// Returns how the platform's atomic compare and swap expects its comparison |
2289 | /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is |
2290 | /// separate from getExtendForAtomicOps, which is concerned with the |
2291 | /// sign-extension of the instruction's output, whereas here we are concerned |
2292 | /// with the sign-extension of the input. For targets with compare-and-swap |
2293 | /// instructions (or sub-word comparisons in their LL/SC loop expansions), |
2294 | /// the input can be ANY_EXTEND, but the output will still have a specific |
2295 | /// extension. |
2296 | virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { |
2297 | return ISD::ANY_EXTEND; |
2298 | } |
2299 | |
2300 | /// @} |
2301 | |
2302 | /// Returns true if we should normalize |
2303 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
2304 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
2305 | /// that it saves us from materializing N0 and N1 in an integer register. |
2306 | /// Targets that are able to perform and/or on flags should return false here. |
2307 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
2308 | EVT VT) const { |
2309 | // If a target has multiple condition registers, then it likely has logical |
2310 | // operations on those registers. |
2311 | if (hasMultipleConditionRegisters()) |
2312 | return false; |
2313 | // Only do the transform if the value won't be split into multiple |
2314 | // registers. |
2315 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
2316 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
2317 | Action != TypeSplitVector; |
2318 | } |
2319 | |
2320 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
2321 | |
2322 | /// Return true if a select of constants (select Cond, C1, C2) should be |
2323 | /// transformed into simple math ops with the condition value. For example: |
2324 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
2325 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
2326 | return false; |
2327 | } |
2328 | |
2329 | /// Return true if it is profitable to transform an integer |
2330 | /// multiplication-by-constant into simpler operations like shifts and adds. |
2331 | /// This may be true if the target does not directly support the |
2332 | /// multiplication operation for the specified type or the sequence of simpler |
2333 | /// ops is faster than the multiply. |
2334 | virtual bool decomposeMulByConstant(LLVMContext &Context, |
2335 | EVT VT, SDValue C) const { |
2336 | return false; |
2337 | } |
2338 | |
2339 | /// Return true if it may be profitable to transform |
2340 | /// (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). |
2341 | /// This may not be true if c1 and c2 can be represented as immediates but |
2342 | /// c1*c2 cannot, for example. |
2343 | /// The target should check if c1, c2 and c1*c2 can be represented as |
2344 | /// immediates, or have to be materialized into registers. If it is not sure |
2345 | /// about some cases, a default true can be returned to let the DAGCombiner |
2346 | /// decide. |
2347 | /// AddNode is (add x, c1), and ConstNode is c2. |
2348 | virtual bool isMulAddWithConstProfitable(SDValue AddNode, |
2349 | SDValue ConstNode) const { |
2350 | return true; |
2351 | } |
2352 | |
2353 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
2354 | /// conversion operations - canonicalizing the FP source value instead of |
2355 | /// converting all cases and then selecting based on value. |
2356 | /// This may be true if the target throws exceptions for out of bounds |
2357 | /// conversions or has fast FP CMOV. |
2358 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
2359 | bool IsSigned) const { |
2360 | return false; |
2361 | } |
2362 | |
2363 | /// Return true if it is beneficial to expand an @llvm.powi.* intrinsic. |
2364 | /// If not optimizing for size, expanding @llvm.powi.* intrinsics is always |
2365 | /// considered beneficial. |
2366 | /// If optimizing for size, expansion is only considered beneficial for upto |
2367 | /// 5 multiplies and a divide (if the exponent is negative). |
2368 | bool isBeneficialToExpandPowI(int64_t Exponent, bool OptForSize) const { |
2369 | if (Exponent < 0) |
2370 | Exponent = -Exponent; |
2371 | uint64_t E = static_cast<uint64_t>(Exponent); |
2372 | return !OptForSize || (llvm::popcount(Value: E) + Log2_64(Value: E) < 7); |
2373 | } |
2374 | |
2375 | //===--------------------------------------------------------------------===// |
2376 | // TargetLowering Configuration Methods - These methods should be invoked by |
2377 | // the derived class constructor to configure this object for the target. |
2378 | // |
2379 | protected: |
2380 | /// Specify how the target extends the result of integer and floating point |
2381 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2382 | void setBooleanContents(BooleanContent Ty) { |
2383 | BooleanContents = Ty; |
2384 | BooleanFloatContents = Ty; |
2385 | } |
2386 | |
2387 | /// Specify how the target extends the result of integer and floating point |
2388 | /// boolean values from i1 to a wider type. See getBooleanContents. |
2389 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
2390 | BooleanContents = IntTy; |
2391 | BooleanFloatContents = FloatTy; |
2392 | } |
2393 | |
2394 | /// Specify how the target extends the result of a vector boolean value from a |
2395 | /// vector of i1 to a wider type. See getBooleanContents. |
2396 | void setBooleanVectorContents(BooleanContent Ty) { |
2397 | BooleanVectorContents = Ty; |
2398 | } |
2399 | |
2400 | /// Specify the target scheduling preference. |
2401 | void setSchedulingPreference(Sched::Preference Pref) { |
2402 | SchedPreferenceInfo = Pref; |
2403 | } |
2404 | |
2405 | /// Indicate the minimum number of blocks to generate jump tables. |
2406 | void setMinimumJumpTableEntries(unsigned Val); |
2407 | |
2408 | /// Indicate the maximum number of entries in jump tables. |
2409 | /// Set to zero to generate unlimited jump tables. |
2410 | void setMaximumJumpTableSize(unsigned); |
2411 | |
2412 | /// If set to a physical register, this specifies the register that |
2413 | /// llvm.savestack/llvm.restorestack should save and restore. |
2414 | void setStackPointerRegisterToSaveRestore(Register R) { |
2415 | StackPointerRegisterToSaveRestore = R; |
2416 | } |
2417 | |
2418 | /// Tells the code generator that the target has multiple (allocatable) |
2419 | /// condition registers that can be used to store the results of comparisons |
2420 | /// for use by selects and conditional branches. With multiple condition |
2421 | /// registers, the code generator will not aggressively sink comparisons into |
2422 | /// the blocks of their users. |
2423 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
2424 | HasMultipleConditionRegisters = hasManyRegs; |
2425 | } |
2426 | |
2427 | /// Tells the code generator that the target has BitExtract instructions. |
2428 | /// The code generator will aggressively sink "shift"s into the blocks of |
2429 | /// their users if the users will generate "and" instructions which can be |
2430 | /// combined with "shift" to BitExtract instructions. |
2431 | void (bool = true) { |
2432 | HasExtractBitsInsn = hasExtractInsn; |
2433 | } |
2434 | |
2435 | /// Tells the code generator not to expand logic operations on comparison |
2436 | /// predicates into separate sequences that increase the amount of flow |
2437 | /// control. |
2438 | void setJumpIsExpensive(bool isExpensive = true); |
2439 | |
2440 | /// Tells the code generator which bitwidths to bypass. |
2441 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
2442 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
2443 | } |
2444 | |
2445 | /// Add the specified register class as an available regclass for the |
2446 | /// specified value type. This indicates the selector can handle values of |
2447 | /// that class natively. |
2448 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
2449 | assert((unsigned)VT.SimpleTy < std::size(RegClassForVT)); |
2450 | RegClassForVT[VT.SimpleTy] = RC; |
2451 | } |
2452 | |
2453 | /// Return the largest legal super-reg register class of the register class |
2454 | /// for the specified type and its associated "cost". |
2455 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
2456 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
2457 | |
2458 | /// Once all of the register classes are added, this allows us to compute |
2459 | /// derived properties we expose. |
2460 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
2461 | |
2462 | /// Indicate that the specified operation does not work with the specified |
2463 | /// type and indicate what to do about it. Note that VT may refer to either |
2464 | /// the type of a result or that of an operand of Op. |
2465 | void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { |
2466 | assert(Op < std::size(OpActions[0]) && "Table isn't big enough!" ); |
2467 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
2468 | } |
2469 | void setOperationAction(ArrayRef<unsigned> Ops, MVT VT, |
2470 | LegalizeAction Action) { |
2471 | for (auto Op : Ops) |
2472 | setOperationAction(Op, VT, Action); |
2473 | } |
2474 | void setOperationAction(ArrayRef<unsigned> Ops, ArrayRef<MVT> VTs, |
2475 | LegalizeAction Action) { |
2476 | for (auto VT : VTs) |
2477 | setOperationAction(Ops, VT, Action); |
2478 | } |
2479 | |
2480 | /// Indicate that the specified load with extension does not work with the |
2481 | /// specified type and indicate what to do about it. |
2482 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2483 | LegalizeAction Action) { |
2484 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
2485 | MemVT.isValid() && "Table isn't big enough!" ); |
2486 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2487 | unsigned Shift = 4 * ExtType; |
2488 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
2489 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
2490 | } |
2491 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, MVT MemVT, |
2492 | LegalizeAction Action) { |
2493 | for (auto ExtType : ExtTypes) |
2494 | setLoadExtAction(ExtType, ValVT, MemVT, Action); |
2495 | } |
2496 | void setLoadExtAction(ArrayRef<unsigned> ExtTypes, MVT ValVT, |
2497 | ArrayRef<MVT> MemVTs, LegalizeAction Action) { |
2498 | for (auto MemVT : MemVTs) |
2499 | setLoadExtAction(ExtTypes, ValVT, MemVT, Action); |
2500 | } |
2501 | |
2502 | /// Indicate that the specified truncating store does not work with the |
2503 | /// specified type and indicate what to do about it. |
2504 | void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { |
2505 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ); |
2506 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
2507 | } |
2508 | |
2509 | /// Indicate that the specified indexed load does or does not work with the |
2510 | /// specified type and indicate what to do abort it. |
2511 | /// |
2512 | /// NOTE: All indexed mode loads are initialized to Expand in |
2513 | /// TargetLowering.cpp |
2514 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2515 | LegalizeAction Action) { |
2516 | for (auto IdxMode : IdxModes) |
2517 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Load, Action); |
2518 | } |
2519 | |
2520 | void setIndexedLoadAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2521 | LegalizeAction Action) { |
2522 | for (auto VT : VTs) |
2523 | setIndexedLoadAction(IdxModes, VT, Action); |
2524 | } |
2525 | |
2526 | /// Indicate that the specified indexed store does or does not work with the |
2527 | /// specified type and indicate what to do about it. |
2528 | /// |
2529 | /// NOTE: All indexed mode stores are initialized to Expand in |
2530 | /// TargetLowering.cpp |
2531 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, MVT VT, |
2532 | LegalizeAction Action) { |
2533 | for (auto IdxMode : IdxModes) |
2534 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_Store, Action); |
2535 | } |
2536 | |
2537 | void setIndexedStoreAction(ArrayRef<unsigned> IdxModes, ArrayRef<MVT> VTs, |
2538 | LegalizeAction Action) { |
2539 | for (auto VT : VTs) |
2540 | setIndexedStoreAction(IdxModes, VT, Action); |
2541 | } |
2542 | |
2543 | /// Indicate that the specified indexed masked load does or does not work with |
2544 | /// the specified type and indicate what to do about it. |
2545 | /// |
2546 | /// NOTE: All indexed mode masked loads are initialized to Expand in |
2547 | /// TargetLowering.cpp |
2548 | void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, |
2549 | LegalizeAction Action) { |
2550 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedLoad, Action); |
2551 | } |
2552 | |
2553 | /// Indicate that the specified indexed masked store does or does not work |
2554 | /// with the specified type and indicate what to do about it. |
2555 | /// |
2556 | /// NOTE: All indexed mode masked stores are initialized to Expand in |
2557 | /// TargetLowering.cpp |
2558 | void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, |
2559 | LegalizeAction Action) { |
2560 | setIndexedModeAction(IdxMode, VT, Shift: IMAB_MaskedStore, Action); |
2561 | } |
2562 | |
2563 | /// Indicate that the specified condition code is or isn't supported on the |
2564 | /// target and indicate what to do about it. |
2565 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, MVT VT, |
2566 | LegalizeAction Action) { |
2567 | for (auto CC : CCs) { |
2568 | assert(VT.isValid() && (unsigned)CC < std::size(CondCodeActions) && |
2569 | "Table isn't big enough!" ); |
2570 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array" ); |
2571 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the |
2572 | /// 32-bit value and the upper 29 bits index into the second dimension of |
2573 | /// the array to select what 32-bit value to use. |
2574 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
2575 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
2576 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
2577 | } |
2578 | } |
2579 | void setCondCodeAction(ArrayRef<ISD::CondCode> CCs, ArrayRef<MVT> VTs, |
2580 | LegalizeAction Action) { |
2581 | for (auto VT : VTs) |
2582 | setCondCodeAction(CCs, VT, Action); |
2583 | } |
2584 | |
2585 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
2586 | /// to trying a larger integer/fp until it can find one that works. If that |
2587 | /// default is insufficient, this method can be used by the target to override |
2588 | /// the default. |
2589 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2590 | PromoteToType[std::make_pair(x&: Opc, y&: OrigVT.SimpleTy)] = DestVT.SimpleTy; |
2591 | } |
2592 | |
2593 | /// Convenience method to set an operation to Promote and specify the type |
2594 | /// in a single call. |
2595 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2596 | setOperationAction(Op: Opc, VT: OrigVT, Action: Promote); |
2597 | AddPromotedToType(Opc, OrigVT, DestVT); |
2598 | } |
2599 | void setOperationPromotedToType(ArrayRef<unsigned> Ops, MVT OrigVT, |
2600 | MVT DestVT) { |
2601 | for (auto Op : Ops) { |
2602 | setOperationAction(Op, VT: OrigVT, Action: Promote); |
2603 | AddPromotedToType(Opc: Op, OrigVT, DestVT); |
2604 | } |
2605 | } |
2606 | |
2607 | /// Targets should invoke this method for each target independent node that |
2608 | /// they want to provide a custom DAG combiner for by implementing the |
2609 | /// PerformDAGCombine virtual method. |
2610 | void setTargetDAGCombine(ArrayRef<ISD::NodeType> NTs) { |
2611 | for (auto NT : NTs) { |
2612 | assert(unsigned(NT >> 3) < std::size(TargetDAGCombineArray)); |
2613 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT & 7); |
2614 | } |
2615 | } |
2616 | |
2617 | /// Set the target's minimum function alignment. |
2618 | void setMinFunctionAlignment(Align Alignment) { |
2619 | MinFunctionAlignment = Alignment; |
2620 | } |
2621 | |
2622 | /// Set the target's preferred function alignment. This should be set if |
2623 | /// there is a performance benefit to higher-than-minimum alignment |
2624 | void setPrefFunctionAlignment(Align Alignment) { |
2625 | PrefFunctionAlignment = Alignment; |
2626 | } |
2627 | |
2628 | /// Set the target's preferred loop alignment. Default alignment is one, it |
2629 | /// means the target does not care about loop alignment. The target may also |
2630 | /// override getPrefLoopAlignment to provide per-loop values. |
2631 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } |
2632 | void setMaxBytesForAlignment(unsigned MaxBytes) { |
2633 | MaxBytesForAlignment = MaxBytes; |
2634 | } |
2635 | |
2636 | /// Set the minimum stack alignment of an argument. |
2637 | void setMinStackArgumentAlignment(Align Alignment) { |
2638 | MinStackArgumentAlignment = Alignment; |
2639 | } |
2640 | |
2641 | /// Set the maximum atomic operation size supported by the |
2642 | /// backend. Atomic operations greater than this size (as well as |
2643 | /// ones that are not naturally aligned), will be expanded by |
2644 | /// AtomicExpandPass into an __atomic_* library call. |
2645 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
2646 | MaxAtomicSizeInBitsSupported = SizeInBits; |
2647 | } |
2648 | |
2649 | /// Set the size in bits of the maximum div/rem the backend supports. |
2650 | /// Larger operations will be expanded by ExpandLargeDivRem. |
2651 | void setMaxDivRemBitWidthSupported(unsigned SizeInBits) { |
2652 | MaxDivRemBitWidthSupported = SizeInBits; |
2653 | } |
2654 | |
2655 | /// Set the size in bits of the maximum fp convert the backend supports. |
2656 | /// Larger operations will be expanded by ExpandLargeFPConvert. |
2657 | void setMaxLargeFPConvertBitWidthSupported(unsigned SizeInBits) { |
2658 | MaxLargeFPConvertBitWidthSupported = SizeInBits; |
2659 | } |
2660 | |
2661 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
2662 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
2663 | MinCmpXchgSizeInBits = SizeInBits; |
2664 | } |
2665 | |
2666 | /// Sets whether unaligned atomic operations are supported. |
2667 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
2668 | SupportsUnalignedAtomics = UnalignedSupported; |
2669 | } |
2670 | |
2671 | public: |
2672 | //===--------------------------------------------------------------------===// |
2673 | // Addressing mode description hooks (used by LSR etc). |
2674 | // |
2675 | |
2676 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
2677 | /// instructions reading the address. This allows as much computation as |
2678 | /// possible to be done in the address mode for that operand. This hook lets |
2679 | /// targets also pass back when this should be done on intrinsics which |
2680 | /// load/store. |
2681 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
2682 | SmallVectorImpl<Value*> &/*Ops*/, |
2683 | Type *&/*AccessTy*/) const { |
2684 | return false; |
2685 | } |
2686 | |
2687 | /// This represents an addressing mode of: |
2688 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
2689 | /// If BaseGV is null, there is no BaseGV. |
2690 | /// If BaseOffs is zero, there is no base offset. |
2691 | /// If HasBaseReg is false, there is no base register. |
2692 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
2693 | /// no scale. |
2694 | struct AddrMode { |
2695 | GlobalValue *BaseGV = nullptr; |
2696 | int64_t BaseOffs = 0; |
2697 | bool HasBaseReg = false; |
2698 | int64_t Scale = 0; |
2699 | AddrMode() = default; |
2700 | }; |
2701 | |
2702 | /// Return true if the addressing mode represented by AM is legal for this |
2703 | /// target, for a load/store of the specified type. |
2704 | /// |
2705 | /// The type may be VoidTy, in which case only return true if the addressing |
2706 | /// mode is legal for a load/store of any legal type. TODO: Handle |
2707 | /// pre/postinc as well. |
2708 | /// |
2709 | /// If the address space cannot be determined, it will be -1. |
2710 | /// |
2711 | /// TODO: Remove default argument |
2712 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
2713 | Type *Ty, unsigned AddrSpace, |
2714 | Instruction *I = nullptr) const; |
2715 | |
2716 | /// Return the prefered common base offset. |
2717 | virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, |
2718 | int64_t MaxOffset) const { |
2719 | return 0; |
2720 | } |
2721 | |
2722 | /// Return true if the specified immediate is legal icmp immediate, that is |
2723 | /// the target has icmp instructions which can compare a register against the |
2724 | /// immediate without having to materialize the immediate into a register. |
2725 | virtual bool isLegalICmpImmediate(int64_t) const { |
2726 | return true; |
2727 | } |
2728 | |
2729 | /// Return true if the specified immediate is legal add immediate, that is the |
2730 | /// target has add instructions which can add a register with the immediate |
2731 | /// without having to materialize the immediate into a register. |
2732 | virtual bool isLegalAddImmediate(int64_t) const { |
2733 | return true; |
2734 | } |
2735 | |
2736 | /// Return true if the specified immediate is legal for the value input of a |
2737 | /// store instruction. |
2738 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
2739 | // Default implementation assumes that at least 0 works since it is likely |
2740 | // that a zero register exists or a zero immediate is allowed. |
2741 | return Value == 0; |
2742 | } |
2743 | |
2744 | /// Return true if it's significantly cheaper to shift a vector by a uniform |
2745 | /// scalar than by an amount which will vary across each lane. On x86 before |
2746 | /// AVX2 for example, there is a "psllw" instruction for the former case, but |
2747 | /// no simple instruction for a general "a << b" operation on vectors. |
2748 | /// This should also apply to lowering for vector funnel shifts (rotates). |
2749 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
2750 | return false; |
2751 | } |
2752 | |
2753 | /// Given a shuffle vector SVI representing a vector splat, return a new |
2754 | /// scalar type of size equal to SVI's scalar type if the new type is more |
2755 | /// profitable. Returns nullptr otherwise. For example under MVE float splats |
2756 | /// are converted to integer to prevent the need to move from SPR to GPR |
2757 | /// registers. |
2758 | virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { |
2759 | return nullptr; |
2760 | } |
2761 | |
2762 | /// Given a set in interconnected phis of type 'From' that are loaded/stored |
2763 | /// or bitcast to type 'To', return true if the set should be converted to |
2764 | /// 'To'. |
2765 | virtual bool shouldConvertPhiType(Type *From, Type *To) const { |
2766 | return (From->isIntegerTy() || From->isFloatingPointTy()) && |
2767 | (To->isIntegerTy() || To->isFloatingPointTy()); |
2768 | } |
2769 | |
2770 | /// Returns true if the opcode is a commutative binary operation. |
2771 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
2772 | // FIXME: This should get its info from the td file. |
2773 | switch (Opcode) { |
2774 | case ISD::ADD: |
2775 | case ISD::SMIN: |
2776 | case ISD::SMAX: |
2777 | case ISD::UMIN: |
2778 | case ISD::UMAX: |
2779 | case ISD::MUL: |
2780 | case ISD::MULHU: |
2781 | case ISD::MULHS: |
2782 | case ISD::SMUL_LOHI: |
2783 | case ISD::UMUL_LOHI: |
2784 | case ISD::FADD: |
2785 | case ISD::FMUL: |
2786 | case ISD::AND: |
2787 | case ISD::OR: |
2788 | case ISD::XOR: |
2789 | case ISD::SADDO: |
2790 | case ISD::UADDO: |
2791 | case ISD::ADDC: |
2792 | case ISD::ADDE: |
2793 | case ISD::SADDSAT: |
2794 | case ISD::UADDSAT: |
2795 | case ISD::FMINNUM: |
2796 | case ISD::FMAXNUM: |
2797 | case ISD::FMINNUM_IEEE: |
2798 | case ISD::FMAXNUM_IEEE: |
2799 | case ISD::FMINIMUM: |
2800 | case ISD::FMAXIMUM: |
2801 | case ISD::AVGFLOORS: |
2802 | case ISD::AVGFLOORU: |
2803 | case ISD::AVGCEILS: |
2804 | case ISD::AVGCEILU: |
2805 | case ISD::ABDS: |
2806 | case ISD::ABDU: |
2807 | return true; |
2808 | default: return false; |
2809 | } |
2810 | } |
2811 | |
2812 | /// Return true if the node is a math/logic binary operator. |
2813 | virtual bool isBinOp(unsigned Opcode) const { |
2814 | // A commutative binop must be a binop. |
2815 | if (isCommutativeBinOp(Opcode)) |
2816 | return true; |
2817 | // These are non-commutative binops. |
2818 | switch (Opcode) { |
2819 | case ISD::SUB: |
2820 | case ISD::SHL: |
2821 | case ISD::SRL: |
2822 | case ISD::SRA: |
2823 | case ISD::ROTL: |
2824 | case ISD::ROTR: |
2825 | case ISD::SDIV: |
2826 | case ISD::UDIV: |
2827 | case ISD::SREM: |
2828 | case ISD::UREM: |
2829 | case ISD::SSUBSAT: |
2830 | case ISD::USUBSAT: |
2831 | case ISD::FSUB: |
2832 | case ISD::FDIV: |
2833 | case ISD::FREM: |
2834 | return true; |
2835 | default: |
2836 | return false; |
2837 | } |
2838 | } |
2839 | |
2840 | /// Return true if it's free to truncate a value of type FromTy to type |
2841 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
2842 | /// by referencing its sub-register AX. |
2843 | /// Targets must return false when FromTy <= ToTy. |
2844 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
2845 | return false; |
2846 | } |
2847 | |
2848 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
2849 | /// whether a call is in tail position. Typically this means that both results |
2850 | /// would be assigned to the same register or stack slot, but it could mean |
2851 | /// the target performs adequate checks of its own before proceeding with the |
2852 | /// tail call. Targets must return false when FromTy <= ToTy. |
2853 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
2854 | return false; |
2855 | } |
2856 | |
2857 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { return false; } |
2858 | virtual bool isTruncateFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
2859 | LLVMContext &Ctx) const { |
2860 | return isTruncateFree(FromVT: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
2861 | ToVT: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
2862 | } |
2863 | |
2864 | /// Return true if truncating the specific node Val to type VT2 is free. |
2865 | virtual bool isTruncateFree(SDValue Val, EVT VT2) const { |
2866 | // Fallback to type matching. |
2867 | return isTruncateFree(FromVT: Val.getValueType(), ToVT: VT2); |
2868 | } |
2869 | |
2870 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
2871 | |
2872 | /// Return true if the extension represented by \p I is free. |
2873 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
2874 | /// this method can use the context provided by \p I to decide |
2875 | /// whether or not \p I is free. |
2876 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
2877 | /// In other words, if is[Z|FP]Free returns true, then this method |
2878 | /// returns true as well. The converse is not true. |
2879 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
2880 | /// \pre \p I must be a sign, zero, or fp extension. |
2881 | bool isExtFree(const Instruction *I) const { |
2882 | switch (I->getOpcode()) { |
2883 | case Instruction::FPExt: |
2884 | if (isFPExtFree(DestVT: EVT::getEVT(Ty: I->getType()), |
2885 | SrcVT: EVT::getEVT(Ty: I->getOperand(i: 0)->getType()))) |
2886 | return true; |
2887 | break; |
2888 | case Instruction::ZExt: |
2889 | if (isZExtFree(FromTy: I->getOperand(i: 0)->getType(), ToTy: I->getType())) |
2890 | return true; |
2891 | break; |
2892 | case Instruction::SExt: |
2893 | break; |
2894 | default: |
2895 | llvm_unreachable("Instruction is not an extension" ); |
2896 | } |
2897 | return isExtFreeImpl(I); |
2898 | } |
2899 | |
2900 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
2901 | /// For example, in AArch64 |
2902 | /// %L = load i8, i8* %ptr |
2903 | /// %E = zext i8 %L to i32 |
2904 | /// can be lowered into one load instruction |
2905 | /// ldrb w0, [x0] |
2906 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
2907 | const DataLayout &DL) const { |
2908 | EVT VT = getValueType(DL, Ty: Ext->getType()); |
2909 | EVT LoadVT = getValueType(DL, Ty: Load->getType()); |
2910 | |
2911 | // If the load has other users and the truncate is not free, the ext |
2912 | // probably isn't free. |
2913 | if (!Load->hasOneUse() && (isTypeLegal(VT: LoadVT) || !isTypeLegal(VT)) && |
2914 | !isTruncateFree(FromTy: Ext->getType(), ToTy: Load->getType())) |
2915 | return false; |
2916 | |
2917 | // Check whether the target supports casts folded into loads. |
2918 | unsigned LType; |
2919 | if (isa<ZExtInst>(Val: Ext)) |
2920 | LType = ISD::ZEXTLOAD; |
2921 | else { |
2922 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!" ); |
2923 | LType = ISD::SEXTLOAD; |
2924 | } |
2925 | |
2926 | return isLoadExtLegal(ExtType: LType, ValVT: VT, MemVT: LoadVT); |
2927 | } |
2928 | |
2929 | /// Return true if any actual instruction that defines a value of type FromTy |
2930 | /// implicitly zero-extends the value to ToTy in the result register. |
2931 | /// |
2932 | /// The function should return true when it is likely that the truncate can |
2933 | /// be freely folded with an instruction defining a value of FromTy. If |
2934 | /// the defining instruction is unknown (because you're looking at a |
2935 | /// function argument, PHI, etc.) then the target may require an |
2936 | /// explicit truncate, which is not necessarily free, but this function |
2937 | /// does not deal with those cases. |
2938 | /// Targets must return false when FromTy >= ToTy. |
2939 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
2940 | return false; |
2941 | } |
2942 | |
2943 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { return false; } |
2944 | virtual bool isZExtFree(LLT FromTy, LLT ToTy, const DataLayout &DL, |
2945 | LLVMContext &Ctx) const { |
2946 | return isZExtFree(FromTy: getApproximateEVTForLLT(Ty: FromTy, DL, Ctx), |
2947 | ToTy: getApproximateEVTForLLT(Ty: ToTy, DL, Ctx)); |
2948 | } |
2949 | |
2950 | /// Return true if zero-extending the specific node Val to type VT2 is free |
2951 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
2952 | /// because it's folded such as X86 zero-extending loads). |
2953 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
2954 | return isZExtFree(FromTy: Val.getValueType(), ToTy: VT2); |
2955 | } |
2956 | |
2957 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
2958 | /// zero-extension. |
2959 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
2960 | return false; |
2961 | } |
2962 | |
2963 | /// Return true if this constant should be sign extended when promoting to |
2964 | /// a larger type. |
2965 | virtual bool signExtendConstant(const ConstantInt *C) const { return false; } |
2966 | |
2967 | /// Return true if sinking I's operands to the same basic block as I is |
2968 | /// profitable, e.g. because the operands can be folded into a target |
2969 | /// instruction during instruction selection. After calling the function |
2970 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
2971 | /// come first). |
2972 | virtual bool shouldSinkOperands(Instruction *I, |
2973 | SmallVectorImpl<Use *> &Ops) const { |
2974 | return false; |
2975 | } |
2976 | |
2977 | /// Try to optimize extending or truncating conversion instructions (like |
2978 | /// zext, trunc, fptoui, uitofp) for the target. |
2979 | virtual bool |
2980 | optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, |
2981 | const TargetTransformInfo &TTI) const { |
2982 | return false; |
2983 | } |
2984 | |
2985 | /// Return true if the target supplies and combines to a paired load |
2986 | /// two loaded values of type LoadedType next to each other in memory. |
2987 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
2988 | /// to be able to select this paired load. |
2989 | /// |
2990 | /// This information is *not* used to generate actual paired loads, but it is |
2991 | /// used to generate a sequence of loads that is easier to combine into a |
2992 | /// paired load. |
2993 | /// For instance, something like this: |
2994 | /// a = load i64* addr |
2995 | /// b = trunc i64 a to i32 |
2996 | /// c = lshr i64 a, 32 |
2997 | /// d = trunc i64 c to i32 |
2998 | /// will be optimized into: |
2999 | /// b = load i32* addr1 |
3000 | /// d = load i32* addr2 |
3001 | /// Where addr1 = addr2 +/- sizeof(i32). |
3002 | /// |
3003 | /// In other words, unless the target performs a post-isel load combining, |
3004 | /// this information should not be provided because it will generate more |
3005 | /// loads. |
3006 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
3007 | Align & /*RequiredAlignment*/) const { |
3008 | return false; |
3009 | } |
3010 | |
3011 | /// Return true if the target has a vector blend instruction. |
3012 | virtual bool hasVectorBlend() const { return false; } |
3013 | |
3014 | /// Get the maximum supported factor for interleaved memory accesses. |
3015 | /// Default to be the minimum interleave factor: 2. |
3016 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
3017 | |
3018 | /// Lower an interleaved load to target specific intrinsics. Return |
3019 | /// true on success. |
3020 | /// |
3021 | /// \p LI is the vector load instruction. |
3022 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
3023 | /// \p Indices is the corresponding indices for each shufflevector. |
3024 | /// \p Factor is the interleave factor. |
3025 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
3026 | ArrayRef<ShuffleVectorInst *> Shuffles, |
3027 | ArrayRef<unsigned> Indices, |
3028 | unsigned Factor) const { |
3029 | return false; |
3030 | } |
3031 | |
3032 | /// Lower an interleaved store to target specific intrinsics. Return |
3033 | /// true on success. |
3034 | /// |
3035 | /// \p SI is the vector store instruction. |
3036 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
3037 | /// \p Factor is the interleave factor. |
3038 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
3039 | unsigned Factor) const { |
3040 | return false; |
3041 | } |
3042 | |
3043 | /// Lower a deinterleave intrinsic to a target specific load intrinsic. |
3044 | /// Return true on success. Currently only supports |
3045 | /// llvm.experimental.vector.deinterleave2 |
3046 | /// |
3047 | /// \p DI is the deinterleave intrinsic. |
3048 | /// \p LI is the accompanying load instruction |
3049 | virtual bool lowerDeinterleaveIntrinsicToLoad(IntrinsicInst *DI, |
3050 | LoadInst *LI) const { |
3051 | return false; |
3052 | } |
3053 | |
3054 | /// Lower an interleave intrinsic to a target specific store intrinsic. |
3055 | /// Return true on success. Currently only supports |
3056 | /// llvm.experimental.vector.interleave2 |
3057 | /// |
3058 | /// \p II is the interleave intrinsic. |
3059 | /// \p SI is the accompanying store instruction |
3060 | virtual bool lowerInterleaveIntrinsicToStore(IntrinsicInst *II, |
3061 | StoreInst *SI) const { |
3062 | return false; |
3063 | } |
3064 | |
3065 | /// Return true if an fpext operation is free (for instance, because |
3066 | /// single-precision floating-point numbers are implicitly extended to |
3067 | /// double-precision). |
3068 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
3069 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
3070 | "invalid fpext types" ); |
3071 | return false; |
3072 | } |
3073 | |
3074 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3075 | /// (for instance, because half-precision floating-point numbers are |
3076 | /// implicitly extended to float-precision) for an FMA instruction. |
3077 | virtual bool isFPExtFoldable(const MachineInstr &MI, unsigned Opcode, |
3078 | LLT DestTy, LLT SrcTy) const { |
3079 | return false; |
3080 | } |
3081 | |
3082 | /// Return true if an fpext operation input to an \p Opcode operation is free |
3083 | /// (for instance, because half-precision floating-point numbers are |
3084 | /// implicitly extended to float-precision) for an FMA instruction. |
3085 | virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, |
3086 | EVT DestVT, EVT SrcVT) const { |
3087 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
3088 | "invalid fpext types" ); |
3089 | return isFPExtFree(DestVT, SrcVT); |
3090 | } |
3091 | |
3092 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
3093 | /// extend node) is profitable. |
3094 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
3095 | |
3096 | /// Return true if an fneg operation is free to the point where it is never |
3097 | /// worthwhile to replace it with a bitwise operation. |
3098 | virtual bool isFNegFree(EVT VT) const { |
3099 | assert(VT.isFloatingPoint()); |
3100 | return false; |
3101 | } |
3102 | |
3103 | /// Return true if an fabs operation is free to the point where it is never |
3104 | /// worthwhile to replace it with a bitwise operation. |
3105 | virtual bool isFAbsFree(EVT VT) const { |
3106 | assert(VT.isFloatingPoint()); |
3107 | return false; |
3108 | } |
3109 | |
3110 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3111 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3112 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3113 | /// |
3114 | /// NOTE: This may be called before legalization on types for which FMAs are |
3115 | /// not legal, but should return true if those types will eventually legalize |
3116 | /// to types that support FMAs. After legalization, it will only be called on |
3117 | /// types that support FMAs (via Legal or Custom actions) |
3118 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3119 | EVT) const { |
3120 | return false; |
3121 | } |
3122 | |
3123 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
3124 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
3125 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
3126 | /// |
3127 | /// NOTE: This may be called before legalization on types for which FMAs are |
3128 | /// not legal, but should return true if those types will eventually legalize |
3129 | /// to types that support FMAs. After legalization, it will only be called on |
3130 | /// types that support FMAs (via Legal or Custom actions) |
3131 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
3132 | LLT) const { |
3133 | return false; |
3134 | } |
3135 | |
3136 | /// IR version |
3137 | virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { |
3138 | return false; |
3139 | } |
3140 | |
3141 | /// Returns true if \p MI can be combined with another instruction to |
3142 | /// form TargetOpcode::G_FMAD. \p N may be an TargetOpcode::G_FADD, |
3143 | /// TargetOpcode::G_FSUB, or an TargetOpcode::G_FMUL which will be |
3144 | /// distributed into an fadd/fsub. |
3145 | virtual bool isFMADLegal(const MachineInstr &MI, LLT Ty) const { |
3146 | assert((MI.getOpcode() == TargetOpcode::G_FADD || |
3147 | MI.getOpcode() == TargetOpcode::G_FSUB || |
3148 | MI.getOpcode() == TargetOpcode::G_FMUL) && |
3149 | "unexpected node in FMAD forming combine" ); |
3150 | switch (Ty.getScalarSizeInBits()) { |
3151 | case 16: |
3152 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f16); |
3153 | case 32: |
3154 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f32); |
3155 | case 64: |
3156 | return isOperationLegal(TargetOpcode::G_FMAD, MVT::f64); |
3157 | default: |
3158 | break; |
3159 | } |
3160 | |
3161 | return false; |
3162 | } |
3163 | |
3164 | /// Returns true if be combined with to form an ISD::FMAD. \p N may be an |
3165 | /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an |
3166 | /// fadd/fsub. |
3167 | virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { |
3168 | assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || |
3169 | N->getOpcode() == ISD::FMUL) && |
3170 | "unexpected node in FMAD forming combine" ); |
3171 | return isOperationLegal(Op: ISD::FMAD, VT: N->getValueType(ResNo: 0)); |
3172 | } |
3173 | |
3174 | // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather |
3175 | // than FMUL and ADD is delegated to the machine combiner. |
3176 | virtual bool generateFMAsInMachineCombiner(EVT VT, |
3177 | CodeGenOptLevel OptLevel) const { |
3178 | return false; |
3179 | } |
3180 | |
3181 | /// Return true if it's profitable to narrow operations of type SrcVT to |
3182 | /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
3183 | /// i32 to i16. |
3184 | virtual bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { |
3185 | return false; |
3186 | } |
3187 | |
3188 | /// Return true if pulling a binary operation into a select with an identity |
3189 | /// constant is profitable. This is the inverse of an IR transform. |
3190 | /// Example: X + (Cond ? Y : 0) --> Cond ? (X + Y) : X |
3191 | virtual bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, |
3192 | EVT VT) const { |
3193 | return false; |
3194 | } |
3195 | |
3196 | /// Return true if it is beneficial to convert a load of a constant to |
3197 | /// just the constant itself. |
3198 | /// On some targets it might be more efficient to use a combination of |
3199 | /// arithmetic instructions to materialize the constant instead of loading it |
3200 | /// from a constant pool. |
3201 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
3202 | Type *Ty) const { |
3203 | return false; |
3204 | } |
3205 | |
3206 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
3207 | /// from this source type with this index. This is needed because |
3208 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
3209 | /// the first element, and only the target knows which lowering is cheap. |
3210 | virtual bool (EVT ResVT, EVT SrcVT, |
3211 | unsigned Index) const { |
3212 | return false; |
3213 | } |
3214 | |
3215 | /// Try to convert an extract element of a vector binary operation into an |
3216 | /// extract element followed by a scalar operation. |
3217 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
3218 | return false; |
3219 | } |
3220 | |
3221 | /// Return true if extraction of a scalar element from the given vector type |
3222 | /// at the given index is cheap. For example, if scalar operations occur on |
3223 | /// the same register file as vector operations, then an extract element may |
3224 | /// be a sub-register rename rather than an actual instruction. |
3225 | virtual bool (EVT VT, unsigned Index) const { |
3226 | return false; |
3227 | } |
3228 | |
3229 | /// Try to convert math with an overflow comparison into the corresponding DAG |
3230 | /// node operation. Targets may want to override this independently of whether |
3231 | /// the operation is legal/custom for the given type because it may obscure |
3232 | /// matching of other patterns. |
3233 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, |
3234 | bool MathUsed) const { |
3235 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
3236 | // The opcode should not make a difference by default? |
3237 | if (Opcode != ISD::UADDO) |
3238 | return false; |
3239 | |
3240 | // Allow the transform as long as we have an integer type that is not |
3241 | // obviously illegal and unsupported and if the math result is used |
3242 | // besides the overflow check. On some targets (e.g. SPARC), it is |
3243 | // not profitable to form on overflow op if the math result has no |
3244 | // concrete users. |
3245 | if (VT.isVector()) |
3246 | return false; |
3247 | return MathUsed && (VT.isSimple() || !isOperationExpand(Op: Opcode, VT)); |
3248 | } |
3249 | |
3250 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
3251 | // even if the vector itself has multiple uses. |
3252 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
3253 | return false; |
3254 | } |
3255 | |
3256 | // Return true if CodeGenPrepare should consider splitting large offset of a |
3257 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
3258 | // same blocks of its users. |
3259 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
3260 | |
3261 | /// Return true if creating a shift of the type by the given |
3262 | /// amount is not profitable. |
3263 | virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { |
3264 | return false; |
3265 | } |
3266 | |
3267 | // Should we fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) |
3268 | // A) where y has a single bit set? |
3269 | virtual bool shouldFoldSelectWithSingleBitTest(EVT VT, |
3270 | const APInt &AndMask) const { |
3271 | unsigned ShCt = AndMask.getBitWidth() - 1; |
3272 | return !shouldAvoidTransformToShift(VT, Amount: ShCt); |
3273 | } |
3274 | |
3275 | /// Does this target require the clearing of high-order bits in a register |
3276 | /// passed to the fp16 to fp conversion library function. |
3277 | virtual bool shouldKeepZExtForFP16Conv() const { return false; } |
3278 | |
3279 | /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT |
3280 | /// from min(max(fptoi)) saturation patterns. |
3281 | virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { |
3282 | return isOperationLegalOrCustom(Op, VT); |
3283 | } |
3284 | |
3285 | /// Does this target support complex deinterleaving |
3286 | virtual bool isComplexDeinterleavingSupported() const { return false; } |
3287 | |
3288 | /// Does this target support complex deinterleaving with the given operation |
3289 | /// and type |
3290 | virtual bool isComplexDeinterleavingOperationSupported( |
3291 | ComplexDeinterleavingOperation Operation, Type *Ty) const { |
3292 | return false; |
3293 | } |
3294 | |
3295 | /// Create the IR node for the given complex deinterleaving operation. |
3296 | /// If one cannot be created using all the given inputs, nullptr should be |
3297 | /// returned. |
3298 | virtual Value *createComplexDeinterleavingIR( |
3299 | IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, |
3300 | ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, |
3301 | Value *Accumulator = nullptr) const { |
3302 | return nullptr; |
3303 | } |
3304 | |
3305 | //===--------------------------------------------------------------------===// |
3306 | // Runtime Library hooks |
3307 | // |
3308 | |
3309 | /// Rename the default libcall routine name for the specified libcall. |
3310 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
3311 | LibcallRoutineNames[Call] = Name; |
3312 | } |
3313 | void setLibcallName(ArrayRef<RTLIB::Libcall> Calls, const char *Name) { |
3314 | for (auto Call : Calls) |
3315 | setLibcallName(Call, Name); |
3316 | } |
3317 | |
3318 | /// Get the libcall routine name for the specified libcall. |
3319 | const char *getLibcallName(RTLIB::Libcall Call) const { |
3320 | return LibcallRoutineNames[Call]; |
3321 | } |
3322 | |
3323 | /// Override the default CondCode to be used to test the result of the |
3324 | /// comparison libcall against zero. |
3325 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
3326 | CmpLibcallCCs[Call] = CC; |
3327 | } |
3328 | |
3329 | /// Get the CondCode that's to be used to test the result of the comparison |
3330 | /// libcall against zero. |
3331 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
3332 | return CmpLibcallCCs[Call]; |
3333 | } |
3334 | |
3335 | /// Set the CallingConv that should be used for the specified libcall. |
3336 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
3337 | LibcallCallingConvs[Call] = CC; |
3338 | } |
3339 | |
3340 | /// Get the CallingConv that should be used for the specified libcall. |
3341 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
3342 | return LibcallCallingConvs[Call]; |
3343 | } |
3344 | |
3345 | /// Execute target specific actions to finalize target lowering. |
3346 | /// This is used to set extra flags in MachineFrameInformation and freezing |
3347 | /// the set of reserved registers. |
3348 | /// The default implementation just freezes the set of reserved registers. |
3349 | virtual void finalizeLowering(MachineFunction &MF) const; |
3350 | |
3351 | //===----------------------------------------------------------------------===// |
3352 | // GlobalISel Hooks |
3353 | //===----------------------------------------------------------------------===// |
3354 | /// Check whether or not \p MI needs to be moved close to its uses. |
3355 | virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; |
3356 | |
3357 | |
3358 | private: |
3359 | const TargetMachine &TM; |
3360 | |
3361 | /// Tells the code generator that the target has multiple (allocatable) |
3362 | /// condition registers that can be used to store the results of comparisons |
3363 | /// for use by selects and conditional branches. With multiple condition |
3364 | /// registers, the code generator will not aggressively sink comparisons into |
3365 | /// the blocks of their users. |
3366 | bool HasMultipleConditionRegisters; |
3367 | |
3368 | /// Tells the code generator that the target has BitExtract instructions. |
3369 | /// The code generator will aggressively sink "shift"s into the blocks of |
3370 | /// their users if the users will generate "and" instructions which can be |
3371 | /// combined with "shift" to BitExtract instructions. |
3372 | bool ; |
3373 | |
3374 | /// Tells the code generator to bypass slow divide or remainder |
3375 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
3376 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
3377 | /// div/rem when the operands are positive and less than 256. |
3378 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
3379 | |
3380 | /// Tells the code generator that it shouldn't generate extra flow control |
3381 | /// instructions and should attempt to combine flow control instructions via |
3382 | /// predication. |
3383 | bool JumpIsExpensive; |
3384 | |
3385 | /// Information about the contents of the high-bits in boolean values held in |
3386 | /// a type wider than i1. See getBooleanContents. |
3387 | BooleanContent BooleanContents; |
3388 | |
3389 | /// Information about the contents of the high-bits in boolean values held in |
3390 | /// a type wider than i1. See getBooleanContents. |
3391 | BooleanContent BooleanFloatContents; |
3392 | |
3393 | /// Information about the contents of the high-bits in boolean vector values |
3394 | /// when the element type is wider than i1. See getBooleanContents. |
3395 | BooleanContent BooleanVectorContents; |
3396 | |
3397 | /// The target scheduling preference: shortest possible total cycles or lowest |
3398 | /// register usage. |
3399 | Sched::Preference SchedPreferenceInfo; |
3400 | |
3401 | /// The minimum alignment that any argument on the stack needs to have. |
3402 | Align MinStackArgumentAlignment; |
3403 | |
3404 | /// The minimum function alignment (used when optimizing for size, and to |
3405 | /// prevent explicitly provided alignment from leading to incorrect code). |
3406 | Align MinFunctionAlignment; |
3407 | |
3408 | /// The preferred function alignment (used when alignment unspecified and |
3409 | /// optimizing for speed). |
3410 | Align PrefFunctionAlignment; |
3411 | |
3412 | /// The preferred loop alignment (in log2 bot in bytes). |
3413 | Align PrefLoopAlignment; |
3414 | /// The maximum amount of bytes permitted to be emitted for alignment. |
3415 | unsigned MaxBytesForAlignment; |
3416 | |
3417 | /// Size in bits of the maximum atomics size the backend supports. |
3418 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
3419 | unsigned MaxAtomicSizeInBitsSupported; |
3420 | |
3421 | /// Size in bits of the maximum div/rem size the backend supports. |
3422 | /// Larger operations will be expanded by ExpandLargeDivRem. |
3423 | unsigned MaxDivRemBitWidthSupported; |
3424 | |
3425 | /// Size in bits of the maximum larget fp convert size the backend |
3426 | /// supports. Larger operations will be expanded by ExpandLargeFPConvert. |
3427 | unsigned MaxLargeFPConvertBitWidthSupported; |
3428 | |
3429 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
3430 | /// backend supports. |
3431 | unsigned MinCmpXchgSizeInBits; |
3432 | |
3433 | /// This indicates if the target supports unaligned atomic operations. |
3434 | bool SupportsUnalignedAtomics; |
3435 | |
3436 | /// If set to a physical register, this specifies the register that |
3437 | /// llvm.savestack/llvm.restorestack should save and restore. |
3438 | Register StackPointerRegisterToSaveRestore; |
3439 | |
3440 | /// This indicates the default register class to use for each ValueType the |
3441 | /// target supports natively. |
3442 | const TargetRegisterClass *RegClassForVT[MVT::VALUETYPE_SIZE]; |
3443 | uint16_t NumRegistersForVT[MVT::VALUETYPE_SIZE]; |
3444 | MVT RegisterTypeForVT[MVT::VALUETYPE_SIZE]; |
3445 | |
3446 | /// This indicates the "representative" register class to use for each |
3447 | /// ValueType the target supports natively. This information is used by the |
3448 | /// scheduler to track register pressure. By default, the representative |
3449 | /// register class is the largest legal super-reg register class of the |
3450 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
3451 | /// representative class would be GR32. |
3452 | const TargetRegisterClass *RepRegClassForVT[MVT::VALUETYPE_SIZE] = {0}; |
3453 | |
3454 | /// This indicates the "cost" of the "representative" register class for each |
3455 | /// ValueType. The cost is used by the scheduler to approximate register |
3456 | /// pressure. |
3457 | uint8_t RepRegClassCostForVT[MVT::VALUETYPE_SIZE]; |
3458 | |
3459 | /// For any value types we are promoting or expanding, this contains the value |
3460 | /// type that we are changing to. For Expanded types, this contains one step |
3461 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
3462 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
3463 | /// the same type (e.g. i32 -> i32). |
3464 | MVT TransformToType[MVT::VALUETYPE_SIZE]; |
3465 | |
3466 | /// For each operation and each value type, keep a LegalizeAction that |
3467 | /// indicates how instruction selection should deal with the operation. Most |
3468 | /// operations are Legal (aka, supported natively by the target), but |
3469 | /// operations that are not should be described. Note that operations on |
3470 | /// non-legal value types are not described here. |
3471 | LegalizeAction OpActions[MVT::VALUETYPE_SIZE][ISD::BUILTIN_OP_END]; |
3472 | |
3473 | /// For each load extension type and each value type, keep a LegalizeAction |
3474 | /// that indicates how instruction selection should deal with a load of a |
3475 | /// specific value type and extension type. Uses 4-bits to store the action |
3476 | /// for each of the 4 load ext types. |
3477 | uint16_t LoadExtActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3478 | |
3479 | /// For each value type pair keep a LegalizeAction that indicates whether a |
3480 | /// truncating store of a specific value type and truncating type is legal. |
3481 | LegalizeAction TruncStoreActions[MVT::VALUETYPE_SIZE][MVT::VALUETYPE_SIZE]; |
3482 | |
3483 | /// For each indexed mode and each value type, keep a quad of LegalizeAction |
3484 | /// that indicates how instruction selection should deal with the load / |
3485 | /// store / maskedload / maskedstore. |
3486 | /// |
3487 | /// The first dimension is the value_type for the reference. The second |
3488 | /// dimension represents the various modes for load store. |
3489 | uint16_t IndexedModeActions[MVT::VALUETYPE_SIZE][ISD::LAST_INDEXED_MODE]; |
3490 | |
3491 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
3492 | /// indicates how instruction selection should deal with the condition code. |
3493 | /// |
3494 | /// Because each CC action takes up 4 bits, we need to have the array size be |
3495 | /// large enough to fit all of the value types. This can be done by rounding |
3496 | /// up the MVT::VALUETYPE_SIZE value to the next multiple of 8. |
3497 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::VALUETYPE_SIZE + 7) / 8]; |
3498 | |
3499 | ValueTypeActionImpl ValueTypeActions; |
3500 | |
3501 | private: |
3502 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
3503 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
3504 | /// array. |
3505 | unsigned char |
3506 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
3507 | |
3508 | /// For operations that must be promoted to a specific type, this holds the |
3509 | /// destination type. This map should be sparse, so don't hold it as an |
3510 | /// array. |
3511 | /// |
3512 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
3513 | /// this with getTypeToPromoteTo(..). |
3514 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
3515 | PromoteToType; |
3516 | |
3517 | /// Stores the name each libcall. |
3518 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; |
3519 | |
3520 | /// The ISD::CondCode that should be used to test the result of each of the |
3521 | /// comparison libcall against zero. |
3522 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
3523 | |
3524 | /// Stores the CallingConv that should be used for each libcall. |
3525 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
3526 | |
3527 | /// Set default libcall names and calling conventions. |
3528 | void InitLibcalls(const Triple &TT); |
3529 | |
3530 | /// The bits of IndexedModeActions used to store the legalisation actions |
3531 | /// We store the data as | ML | MS | L | S | each taking 4 bits. |
3532 | enum IndexedModeActionsBits { |
3533 | IMAB_Store = 0, |
3534 | IMAB_Load = 4, |
3535 | IMAB_MaskedStore = 8, |
3536 | IMAB_MaskedLoad = 12 |
3537 | }; |
3538 | |
3539 | void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, |
3540 | LegalizeAction Action) { |
3541 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
3542 | (unsigned)Action < 0xf && "Table isn't big enough!" ); |
3543 | unsigned Ty = (unsigned)VT.SimpleTy; |
3544 | IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); |
3545 | IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; |
3546 | } |
3547 | |
3548 | LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, |
3549 | unsigned Shift) const { |
3550 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
3551 | "Table isn't big enough!" ); |
3552 | unsigned Ty = (unsigned)VT.SimpleTy; |
3553 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); |
3554 | } |
3555 | |
3556 | protected: |
3557 | /// Return true if the extension represented by \p I is free. |
3558 | /// \pre \p I is a sign, zero, or fp extension and |
3559 | /// is[Z|FP]ExtFree of the related types is not true. |
3560 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
3561 | |
3562 | /// Depth that GatherAllAliases should continue looking for chain |
3563 | /// dependencies when trying to find a more preferable chain. As an |
3564 | /// approximation, this should be more than the number of consecutive stores |
3565 | /// expected to be merged. |
3566 | unsigned GatherAllAliasesMaxDepth; |
3567 | |
3568 | /// \brief Specify maximum number of store instructions per memset call. |
3569 | /// |
3570 | /// When lowering \@llvm.memset this field specifies the maximum number of |
3571 | /// store operations that may be substituted for the call to memset. Targets |
3572 | /// must set this value based on the cost threshold for that target. Targets |
3573 | /// should assume that the memset will be done using as many of the largest |
3574 | /// store operations first, followed by smaller ones, if necessary, per |
3575 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
3576 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
3577 | /// store. This only applies to setting a constant array of a constant size. |
3578 | unsigned MaxStoresPerMemset; |
3579 | /// Likewise for functions with the OptSize attribute. |
3580 | unsigned MaxStoresPerMemsetOptSize; |
3581 | |
3582 | /// \brief Specify maximum number of store instructions per memcpy call. |
3583 | /// |
3584 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
3585 | /// store operations that may be substituted for a call to memcpy. Targets |
3586 | /// must set this value based on the cost threshold for that target. Targets |
3587 | /// should assume that the memcpy will be done using as many of the largest |
3588 | /// store operations first, followed by smaller ones, if necessary, per |
3589 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
3590 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
3591 | /// and one 1-byte store. This only applies to copying a constant array of |
3592 | /// constant size. |
3593 | unsigned MaxStoresPerMemcpy; |
3594 | /// Likewise for functions with the OptSize attribute. |
3595 | unsigned MaxStoresPerMemcpyOptSize; |
3596 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
3597 | /// |
3598 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
3599 | /// of store instructions to keep together. This helps in pairing and |
3600 | // vectorization later on. |
3601 | unsigned MaxGluedStoresPerMemcpy = 0; |
3602 | |
3603 | /// \brief Specify maximum number of load instructions per memcmp call. |
3604 | /// |
3605 | /// When lowering \@llvm.memcmp this field specifies the maximum number of |
3606 | /// pairs of load operations that may be substituted for a call to memcmp. |
3607 | /// Targets must set this value based on the cost threshold for that target. |
3608 | /// Targets should assume that the memcmp will be done using as many of the |
3609 | /// largest load operations first, followed by smaller ones, if necessary, per |
3610 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine |
3611 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load |
3612 | /// and one 1-byte load. This only applies to copying a constant array of |
3613 | /// constant size. |
3614 | unsigned MaxLoadsPerMemcmp; |
3615 | /// Likewise for functions with the OptSize attribute. |
3616 | unsigned MaxLoadsPerMemcmpOptSize; |
3617 | |
3618 | /// \brief Specify maximum number of store instructions per memmove call. |
3619 | /// |
3620 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
3621 | /// store instructions that may be substituted for a call to memmove. Targets |
3622 | /// must set this value based on the cost threshold for that target. Targets |
3623 | /// should assume that the memmove will be done using as many of the largest |
3624 | /// store operations first, followed by smaller ones, if necessary, per |
3625 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
3626 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
3627 | /// applies to copying a constant array of constant size. |
3628 | unsigned MaxStoresPerMemmove; |
3629 | /// Likewise for functions with the OptSize attribute. |
3630 | unsigned MaxStoresPerMemmoveOptSize; |
3631 | |
3632 | /// Tells the code generator that select is more expensive than a branch if |
3633 | /// the branch is usually predicted right. |
3634 | bool PredictableSelectIsExpensive; |
3635 | |
3636 | /// \see enableExtLdPromotion. |
3637 | bool EnableExtLdPromotion; |
3638 | |
3639 | /// Return true if the value types that can be represented by the specified |
3640 | /// register class are all legal. |
3641 | bool isLegalRC(const TargetRegisterInfo &TRI, |
3642 | const TargetRegisterClass &RC) const; |
3643 | |
3644 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
3645 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
3646 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
3647 | MachineBasicBlock *MBB) const; |
3648 | |
3649 | bool IsStrictFPEnabled; |
3650 | }; |
3651 | |
3652 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
3653 | /// operators that the target instruction selector can accept natively. |
3654 | /// |
3655 | /// This class also defines callbacks that targets must implement to lower |
3656 | /// target-specific constructs to SelectionDAG operators. |
3657 | class TargetLowering : public TargetLoweringBase { |
3658 | public: |
3659 | struct DAGCombinerInfo; |
3660 | struct MakeLibCallOptions; |
3661 | |
3662 | TargetLowering(const TargetLowering &) = delete; |
3663 | TargetLowering &operator=(const TargetLowering &) = delete; |
3664 | |
3665 | explicit TargetLowering(const TargetMachine &TM); |
3666 | |
3667 | bool isPositionIndependent() const; |
3668 | |
3669 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
3670 | FunctionLoweringInfo *FLI, |
3671 | UniformityInfo *UA) const { |
3672 | return false; |
3673 | } |
3674 | |
3675 | // Lets target to control the following reassociation of operands: (op (op x, |
3676 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3677 | // default consider profitable any case where N0 has single use. This |
3678 | // behavior reflects the condition replaced by this target hook call in the |
3679 | // DAGCombiner. Any particular target can implement its own heuristic to |
3680 | // restrict common combiner. |
3681 | virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0, |
3682 | SDValue N1) const { |
3683 | return N0.hasOneUse(); |
3684 | } |
3685 | |
3686 | // Lets target to control the following reassociation of operands: (op (op x, |
3687 | // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By |
3688 | // default consider profitable any case where N0 has single use. This |
3689 | // behavior reflects the condition replaced by this target hook call in the |
3690 | // combiner. Any particular target can implement its own heuristic to |
3691 | // restrict common combiner. |
3692 | virtual bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, |
3693 | Register N1) const { |
3694 | return MRI.hasOneNonDBGUse(RegNo: N0); |
3695 | } |
3696 | |
3697 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
3698 | return false; |
3699 | } |
3700 | |
3701 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3702 | /// by reference if the node's address can be legally represented as |
3703 | /// pre-indexed load / store address. |
3704 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
3705 | SDValue &/*Offset*/, |
3706 | ISD::MemIndexedMode &/*AM*/, |
3707 | SelectionDAG &/*DAG*/) const { |
3708 | return false; |
3709 | } |
3710 | |
3711 | /// Returns true by value, base pointer and offset pointer and addressing mode |
3712 | /// by reference if this node can be combined with a load / store to form a |
3713 | /// post-indexed load / store. |
3714 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
3715 | SDValue &/*Base*/, |
3716 | SDValue &/*Offset*/, |
3717 | ISD::MemIndexedMode &/*AM*/, |
3718 | SelectionDAG &/*DAG*/) const { |
3719 | return false; |
3720 | } |
3721 | |
3722 | /// Returns true if the specified base+offset is a legal indexed addressing |
3723 | /// mode for this target. \p MI is the load or store instruction that is being |
3724 | /// considered for transformation. |
3725 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
3726 | bool IsPre, MachineRegisterInfo &MRI) const { |
3727 | return false; |
3728 | } |
3729 | |
3730 | /// Return the entry encoding for a jump table in the current function. The |
3731 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
3732 | virtual unsigned getJumpTableEncoding() const; |
3733 | |
3734 | virtual const MCExpr * |
3735 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
3736 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
3737 | MCContext &/*Ctx*/) const { |
3738 | llvm_unreachable("Need to implement this hook if target has custom JTIs" ); |
3739 | } |
3740 | |
3741 | /// Returns relocation base for the given PIC jumptable. |
3742 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
3743 | SelectionDAG &DAG) const; |
3744 | |
3745 | /// This returns the relocation base for the given PIC jumptable, the same as |
3746 | /// getPICJumpTableRelocBase, but as an MCExpr. |
3747 | virtual const MCExpr * |
3748 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
3749 | unsigned JTI, MCContext &Ctx) const; |
3750 | |
3751 | /// Return true if folding a constant offset with the given GlobalAddress is |
3752 | /// legal. It is frequently not legal in PIC relocation models. |
3753 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
3754 | |
3755 | /// On x86, return true if the operand with index OpNo is a CALL or JUMP |
3756 | /// instruction, which can use either a memory constraint or an address |
3757 | /// constraint. -fasm-blocks "__asm call foo" lowers to |
3758 | /// call void asm sideeffect inteldialect "call ${0:P}", "*m..." |
3759 | /// |
3760 | /// This function is used by a hack to choose the address constraint, |
3761 | /// lowering to a direct call. |
3762 | virtual bool |
3763 | isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs, |
3764 | unsigned OpNo) const { |
3765 | return false; |
3766 | } |
3767 | |
3768 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
3769 | SDValue &Chain) const; |
3770 | |
3771 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3772 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3773 | const SDLoc &DL, const SDValue OldLHS, |
3774 | const SDValue OldRHS) const; |
3775 | |
3776 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3777 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3778 | const SDLoc &DL, const SDValue OldLHS, |
3779 | const SDValue OldRHS, SDValue &Chain, |
3780 | bool IsSignaling = false) const; |
3781 | |
3782 | /// Returns a pair of (return value, chain). |
3783 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
3784 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
3785 | EVT RetVT, ArrayRef<SDValue> Ops, |
3786 | MakeLibCallOptions CallOptions, |
3787 | const SDLoc &dl, |
3788 | SDValue Chain = SDValue()) const; |
3789 | |
3790 | /// Check whether parameters to a call that are passed in callee saved |
3791 | /// registers are the same as from the calling function. This needs to be |
3792 | /// checked for tail call eligibility. |
3793 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
3794 | const uint32_t *CallerPreservedMask, |
3795 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
3796 | const SmallVectorImpl<SDValue> &OutVals) const; |
3797 | |
3798 | //===--------------------------------------------------------------------===// |
3799 | // TargetLowering Optimization Methods |
3800 | // |
3801 | |
3802 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
3803 | /// returning information from TargetLowering to its clients that want to |
3804 | /// combine. |
3805 | struct TargetLoweringOpt { |
3806 | SelectionDAG &DAG; |
3807 | bool LegalTys; |
3808 | bool LegalOps; |
3809 | SDValue Old; |
3810 | SDValue New; |
3811 | |
3812 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
3813 | bool LT, bool LO) : |
3814 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
3815 | |
3816 | bool LegalTypes() const { return LegalTys; } |
3817 | bool LegalOperations() const { return LegalOps; } |
3818 | |
3819 | bool CombineTo(SDValue O, SDValue N) { |
3820 | Old = O; |
3821 | New = N; |
3822 | return true; |
3823 | } |
3824 | }; |
3825 | |
3826 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
3827 | /// Return true if the number of memory ops is below the threshold (Limit). |
3828 | /// Note that this is always the case when Limit is ~0. |
3829 | /// It returns the types of the sequence of memory ops to perform |
3830 | /// memset / memcpy by reference. |
3831 | virtual bool |
3832 | findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, |
3833 | const MemOp &Op, unsigned DstAS, unsigned SrcAS, |
3834 | const AttributeList &FuncAttributes) const; |
3835 | |
3836 | /// Check to see if the specified operand of the specified instruction is a |
3837 | /// constant integer. If so, check to see if there are any bits set in the |
3838 | /// constant that are not demanded. If so, shrink the constant and return |
3839 | /// true. |
3840 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3841 | const APInt &DemandedElts, |
3842 | TargetLoweringOpt &TLO) const; |
3843 | |
3844 | /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. |
3845 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, |
3846 | TargetLoweringOpt &TLO) const; |
3847 | |
3848 | // Target hook to do target-specific const optimization, which is called by |
3849 | // ShrinkDemandedConstant. This function should return true if the target |
3850 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
3851 | virtual bool targetShrinkDemandedConstant(SDValue Op, |
3852 | const APInt &DemandedBits, |
3853 | const APInt &DemandedElts, |
3854 | TargetLoweringOpt &TLO) const { |
3855 | return false; |
3856 | } |
3857 | |
3858 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. |
3859 | /// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, |
3860 | /// but it could be generalized for targets with other types of implicit |
3861 | /// widening casts. |
3862 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, |
3863 | const APInt &DemandedBits, |
3864 | TargetLoweringOpt &TLO) const; |
3865 | |
3866 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
3867 | /// result of Op are ever used downstream. If we can use this information to |
3868 | /// simplify Op, create a new simplified DAG node and return true, returning |
3869 | /// the original and new nodes in Old and New. Otherwise, analyze the |
3870 | /// expression and return a mask of KnownOne and KnownZero bits for the |
3871 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
3872 | /// be accurate for those bits in the Demanded masks. |
3873 | /// \p AssumeSingleUse When this parameter is true, this function will |
3874 | /// attempt to simplify \p Op even if there are multiple uses. |
3875 | /// Callers are responsible for correctly updating the DAG based on the |
3876 | /// results of this function, because simply replacing TLO.Old |
3877 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
3878 | /// has multiple uses. |
3879 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3880 | const APInt &DemandedElts, KnownBits &Known, |
3881 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
3882 | bool AssumeSingleUse = false) const; |
3883 | |
3884 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
3885 | /// Adds Op back to the worklist upon success. |
3886 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3887 | KnownBits &Known, TargetLoweringOpt &TLO, |
3888 | unsigned Depth = 0, |
3889 | bool AssumeSingleUse = false) const; |
3890 | |
3891 | /// Helper wrapper around SimplifyDemandedBits. |
3892 | /// Adds Op back to the worklist upon success. |
3893 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3894 | DAGCombinerInfo &DCI) const; |
3895 | |
3896 | /// Helper wrapper around SimplifyDemandedBits. |
3897 | /// Adds Op back to the worklist upon success. |
3898 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3899 | const APInt &DemandedElts, |
3900 | DAGCombinerInfo &DCI) const; |
3901 | |
3902 | /// More limited version of SimplifyDemandedBits that can be used to "look |
3903 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
3904 | /// bitwise ops etc. |
3905 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
3906 | const APInt &DemandedElts, |
3907 | SelectionDAG &DAG, |
3908 | unsigned Depth = 0) const; |
3909 | |
3910 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
3911 | /// elements. |
3912 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
3913 | SelectionDAG &DAG, |
3914 | unsigned Depth = 0) const; |
3915 | |
3916 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all |
3917 | /// bits from only some vector elements. |
3918 | SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, |
3919 | const APInt &DemandedElts, |
3920 | SelectionDAG &DAG, |
3921 | unsigned Depth = 0) const; |
3922 | |
3923 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
3924 | /// elements of the result of Op are ever used downstream. If we can use |
3925 | /// this information to simplify Op, create a new simplified DAG node and |
3926 | /// return true, storing the original and new nodes in TLO. |
3927 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
3928 | /// KnownZero elements for the expression (used to simplify the caller). |
3929 | /// The KnownUndef/Zero elements may only be accurate for those bits |
3930 | /// in the DemandedMask. |
3931 | /// \p AssumeSingleUse When this parameter is true, this function will |
3932 | /// attempt to simplify \p Op even if there are multiple uses. |
3933 | /// Callers are responsible for correctly updating the DAG based on the |
3934 | /// results of this function, because simply replacing TLO.Old |
3935 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
3936 | /// has multiple uses. |
3937 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
3938 | APInt &KnownUndef, APInt &KnownZero, |
3939 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
3940 | bool AssumeSingleUse = false) const; |
3941 | |
3942 | /// Helper wrapper around SimplifyDemandedVectorElts. |
3943 | /// Adds Op back to the worklist upon success. |
3944 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
3945 | DAGCombinerInfo &DCI) const; |
3946 | |
3947 | /// Return true if the target supports simplifying demanded vector elements by |
3948 | /// converting them to undefs. |
3949 | virtual bool |
3950 | shouldSimplifyDemandedVectorElts(SDValue Op, |
3951 | const TargetLoweringOpt &TLO) const { |
3952 | return true; |
3953 | } |
3954 | |
3955 | /// Determine which of the bits specified in Mask are known to be either zero |
3956 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
3957 | /// argument allows us to only collect the known bits that are shared by the |
3958 | /// requested vector elements. |
3959 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
3960 | KnownBits &Known, |
3961 | const APInt &DemandedElts, |
3962 | const SelectionDAG &DAG, |
3963 | unsigned Depth = 0) const; |
3964 | |
3965 | /// Determine which of the bits specified in Mask are known to be either zero |
3966 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
3967 | /// argument allows us to only collect the known bits that are shared by the |
3968 | /// requested vector elements. This is for GISel. |
3969 | virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, |
3970 | Register R, KnownBits &Known, |
3971 | const APInt &DemandedElts, |
3972 | const MachineRegisterInfo &MRI, |
3973 | unsigned Depth = 0) const; |
3974 | |
3975 | /// Determine the known alignment for the pointer value \p R. This is can |
3976 | /// typically be inferred from the number of low known 0 bits. However, for a |
3977 | /// pointer with a non-integral address space, the alignment value may be |
3978 | /// independent from the known low bits. |
3979 | virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, |
3980 | Register R, |
3981 | const MachineRegisterInfo &MRI, |
3982 | unsigned Depth = 0) const; |
3983 | |
3984 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
3985 | /// Default implementation computes low bits based on alignment |
3986 | /// information. This should preserve known bits passed into it. |
3987 | virtual void computeKnownBitsForFrameIndex(int FIOp, |
3988 | KnownBits &Known, |
3989 | const MachineFunction &MF) const; |
3990 | |
3991 | /// This method can be implemented by targets that want to expose additional |
3992 | /// information about sign bits to the DAG Combiner. The DemandedElts |
3993 | /// argument allows us to only collect the minimum sign bits that are shared |
3994 | /// by the requested vector elements. |
3995 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
3996 | const APInt &DemandedElts, |
3997 | const SelectionDAG &DAG, |
3998 | unsigned Depth = 0) const; |
3999 | |
4000 | /// This method can be implemented by targets that want to expose additional |
4001 | /// information about sign bits to GlobalISel combiners. The DemandedElts |
4002 | /// argument allows us to only collect the minimum sign bits that are shared |
4003 | /// by the requested vector elements. |
4004 | virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, |
4005 | Register R, |
4006 | const APInt &DemandedElts, |
4007 | const MachineRegisterInfo &MRI, |
4008 | unsigned Depth = 0) const; |
4009 | |
4010 | /// Attempt to simplify any target nodes based on the demanded vector |
4011 | /// elements, returning true on success. Otherwise, analyze the expression and |
4012 | /// return a mask of KnownUndef and KnownZero elements for the expression |
4013 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
4014 | /// accurate for those bits in the DemandedMask. |
4015 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
4016 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
4017 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
4018 | |
4019 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
4020 | /// returning true on success. Otherwise, analyze the |
4021 | /// expression and return a mask of KnownOne and KnownZero bits for the |
4022 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
4023 | /// be accurate for those bits in the Demanded masks. |
4024 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
4025 | const APInt &DemandedBits, |
4026 | const APInt &DemandedElts, |
4027 | KnownBits &Known, |
4028 | TargetLoweringOpt &TLO, |
4029 | unsigned Depth = 0) const; |
4030 | |
4031 | /// More limited version of SimplifyDemandedBits that can be used to "look |
4032 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
4033 | /// bitwise ops etc. |
4034 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
4035 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
4036 | SelectionDAG &DAG, unsigned Depth) const; |
4037 | |
4038 | /// Return true if this function can prove that \p Op is never poison |
4039 | /// and, if \p PoisonOnly is false, does not have undef bits. The DemandedElts |
4040 | /// argument limits the check to the requested vector elements. |
4041 | virtual bool isGuaranteedNotToBeUndefOrPoisonForTargetNode( |
4042 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
4043 | bool PoisonOnly, unsigned Depth) const; |
4044 | |
4045 | /// Return true if Op can create undef or poison from non-undef & non-poison |
4046 | /// operands. The DemandedElts argument limits the check to the requested |
4047 | /// vector elements. |
4048 | virtual bool |
4049 | canCreateUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, |
4050 | const SelectionDAG &DAG, bool PoisonOnly, |
4051 | bool ConsiderFlags, unsigned Depth) const; |
4052 | |
4053 | /// Tries to build a legal vector shuffle using the provided parameters |
4054 | /// or equivalent variations. The Mask argument maybe be modified as the |
4055 | /// function tries different variations. |
4056 | /// Returns an empty SDValue if the operation fails. |
4057 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
4058 | SDValue N1, MutableArrayRef<int> Mask, |
4059 | SelectionDAG &DAG) const; |
4060 | |
4061 | /// This method returns the constant pool value that will be loaded by LD. |
4062 | /// NOTE: You must check for implicit extensions of the constant by LD. |
4063 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
4064 | |
4065 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
4066 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
4067 | /// NaN. |
4068 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
4069 | const SelectionDAG &DAG, |
4070 | bool SNaN = false, |
4071 | unsigned Depth = 0) const; |
4072 | |
4073 | /// Return true if vector \p Op has the same value across all \p DemandedElts, |
4074 | /// indicating any elements which may be undef in the output \p UndefElts. |
4075 | virtual bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, |
4076 | APInt &UndefElts, |
4077 | const SelectionDAG &DAG, |
4078 | unsigned Depth = 0) const; |
4079 | |
4080 | /// Returns true if the given Opc is considered a canonical constant for the |
4081 | /// target, which should not be transformed back into a BUILD_VECTOR. |
4082 | virtual bool isTargetCanonicalConstantNode(SDValue Op) const { |
4083 | return Op.getOpcode() == ISD::SPLAT_VECTOR || |
4084 | Op.getOpcode() == ISD::SPLAT_VECTOR_PARTS; |
4085 | } |
4086 | |
4087 | struct DAGCombinerInfo { |
4088 | void *DC; // The DAG Combiner object. |
4089 | CombineLevel Level; |
4090 | bool CalledByLegalizer; |
4091 | |
4092 | public: |
4093 | SelectionDAG &DAG; |
4094 | |
4095 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
4096 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
4097 | |
4098 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
4099 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
4100 | bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } |
4101 | CombineLevel getDAGCombineLevel() { return Level; } |
4102 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
4103 | |
4104 | void AddToWorklist(SDNode *N); |
4105 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
4106 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
4107 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
4108 | |
4109 | bool recursivelyDeleteUnusedNodes(SDNode *N); |
4110 | |
4111 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
4112 | }; |
4113 | |
4114 | /// Return if the N is a constant or constant vector equal to the true value |
4115 | /// from getBooleanContents(). |
4116 | bool isConstTrueVal(SDValue N) const; |
4117 | |
4118 | /// Return if the N is a constant or constant vector equal to the false value |
4119 | /// from getBooleanContents(). |
4120 | bool isConstFalseVal(SDValue N) const; |
4121 | |
4122 | /// Return if \p N is a True value when extended to \p VT. |
4123 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
4124 | |
4125 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
4126 | /// unable to simplify it, return a null SDValue. |
4127 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4128 | bool foldBooleans, DAGCombinerInfo &DCI, |
4129 | const SDLoc &dl) const; |
4130 | |
4131 | // For targets which wrap address, unwrap for analysis. |
4132 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
4133 | |
4134 | /// Returns true (and the GlobalValue and the offset) if the node is a |
4135 | /// GlobalAddress + offset. |
4136 | virtual bool |
4137 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
4138 | |
4139 | /// This method will be invoked for all target nodes and for any |
4140 | /// target-independent nodes that the target has registered with invoke it |
4141 | /// for. |
4142 | /// |
4143 | /// The semantics are as follows: |
4144 | /// Return Value: |
4145 | /// SDValue.Val == 0 - No change was made |
4146 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
4147 | /// otherwise - N should be replaced by the returned Operand. |
4148 | /// |
4149 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
4150 | /// more complex transformations. |
4151 | /// |
4152 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
4153 | |
4154 | /// Return true if it is profitable to move this shift by a constant amount |
4155 | /// through its operand, adjusting any immediate operands as necessary to |
4156 | /// preserve semantics. This transformation may not be desirable if it |
4157 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
4158 | /// extraction in AArch64). By default, it returns true. |
4159 | /// |
4160 | /// @param N the shift node |
4161 | /// @param Level the current DAGCombine legalization level. |
4162 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
4163 | CombineLevel Level) const { |
4164 | return true; |
4165 | } |
4166 | |
4167 | /// GlobalISel - return true if it is profitable to move this shift by a |
4168 | /// constant amount through its operand, adjusting any immediate operands as |
4169 | /// necessary to preserve semantics. This transformation may not be desirable |
4170 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4171 | /// bitfield extraction in AArch64). By default, it returns true. |
4172 | /// |
4173 | /// @param MI the shift instruction |
4174 | /// @param IsAfterLegal true if running after legalization. |
4175 | virtual bool isDesirableToCommuteWithShift(const MachineInstr &MI, |
4176 | bool IsAfterLegal) const { |
4177 | return true; |
4178 | } |
4179 | |
4180 | /// GlobalISel - return true if it's profitable to perform the combine: |
4181 | /// shl ([sza]ext x), y => zext (shl x, y) |
4182 | virtual bool isDesirableToPullExtFromShl(const MachineInstr &MI) const { |
4183 | return true; |
4184 | } |
4185 | |
4186 | // Return AndOrSETCCFoldKind::{AddAnd, ABS} if its desirable to try and |
4187 | // optimize LogicOp(SETCC0, SETCC1). An example (what is implemented as of |
4188 | // writing this) is: |
4189 | // With C as a power of 2 and C != 0 and C != INT_MIN: |
4190 | // AddAnd: |
4191 | // (icmp eq A, C) | (icmp eq A, -C) |
4192 | // -> (icmp eq and(add(A, C), ~(C + C)), 0) |
4193 | // (icmp ne A, C) & (icmp ne A, -C)w |
4194 | // -> (icmp ne and(add(A, C), ~(C + C)), 0) |
4195 | // ABS: |
4196 | // (icmp eq A, C) | (icmp eq A, -C) |
4197 | // -> (icmp eq Abs(A), C) |
4198 | // (icmp ne A, C) & (icmp ne A, -C)w |
4199 | // -> (icmp ne Abs(A), C) |
4200 | // |
4201 | // @param LogicOp the logic op |
4202 | // @param SETCC0 the first of the SETCC nodes |
4203 | // @param SETCC0 the second of the SETCC nodes |
4204 | virtual AndOrSETCCFoldKind isDesirableToCombineLogicOpOfSETCC( |
4205 | const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const { |
4206 | return AndOrSETCCFoldKind::None; |
4207 | } |
4208 | |
4209 | /// Return true if it is profitable to combine an XOR of a logical shift |
4210 | /// to create a logical shift of NOT. This transformation may not be desirable |
4211 | /// if it disrupts a particularly auspicious target-specific tree (e.g. |
4212 | /// BIC on ARM/AArch64). By default, it returns true. |
4213 | virtual bool isDesirableToCommuteXorWithShift(const SDNode *N) const { |
4214 | return true; |
4215 | } |
4216 | |
4217 | /// Return true if the target has native support for the specified value type |
4218 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
4219 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
4220 | /// and some i16 instructions are slow. |
4221 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
4222 | // By default, assume all legal types are desirable. |
4223 | return isTypeLegal(VT); |
4224 | } |
4225 | |
4226 | /// Return true if it is profitable for dag combiner to transform a floating |
4227 | /// point op of specified opcode to a equivalent op of an integer |
4228 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
4229 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
4230 | EVT /*VT*/) const { |
4231 | return false; |
4232 | } |
4233 | |
4234 | /// This method query the target whether it is beneficial for dag combiner to |
4235 | /// promote the specified node. If true, it should return the desired |
4236 | /// promotion type by reference. |
4237 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
4238 | return false; |
4239 | } |
4240 | |
4241 | /// Return true if the target supports swifterror attribute. It optimizes |
4242 | /// loads and stores to reading and writing a specific register. |
4243 | virtual bool supportSwiftError() const { |
4244 | return false; |
4245 | } |
4246 | |
4247 | /// Return true if the target supports that a subset of CSRs for the given |
4248 | /// machine function is handled explicitly via copies. |
4249 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
4250 | return false; |
4251 | } |
4252 | |
4253 | /// Return true if the target supports kcfi operand bundles. |
4254 | virtual bool supportKCFIBundles() const { return false; } |
4255 | |
4256 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
4257 | /// via copies. This function is called at the beginning of instruction |
4258 | /// selection. |
4259 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
4260 | llvm_unreachable("Not Implemented" ); |
4261 | } |
4262 | |
4263 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
4264 | /// CSRs to virtual registers in the entry block, and copy them back to |
4265 | /// physical registers in the exit blocks. This function is called at the end |
4266 | /// of instruction selection. |
4267 | virtual void insertCopiesSplitCSR( |
4268 | MachineBasicBlock *Entry, |
4269 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
4270 | llvm_unreachable("Not Implemented" ); |
4271 | } |
4272 | |
4273 | /// Return the newly negated expression if the cost is not expensive and |
4274 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to |
4275 | /// do the negation. |
4276 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4277 | bool LegalOps, bool OptForSize, |
4278 | NegatibleCost &Cost, |
4279 | unsigned Depth = 0) const; |
4280 | |
4281 | SDValue getCheaperOrNeutralNegatedExpression( |
4282 | SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, |
4283 | const NegatibleCost CostThreshold = NegatibleCost::Neutral, |
4284 | unsigned Depth = 0) const { |
4285 | NegatibleCost Cost = NegatibleCost::Expensive; |
4286 | SDValue Neg = |
4287 | getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4288 | if (!Neg) |
4289 | return SDValue(); |
4290 | |
4291 | if (Cost <= CostThreshold) |
4292 | return Neg; |
4293 | |
4294 | // Remove the new created node to avoid the side effect to the DAG. |
4295 | if (Neg->use_empty()) |
4296 | DAG.RemoveDeadNode(N: Neg.getNode()); |
4297 | return SDValue(); |
4298 | } |
4299 | |
4300 | /// This is the helper function to return the newly negated expression only |
4301 | /// when the cost is cheaper. |
4302 | SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, |
4303 | bool LegalOps, bool OptForSize, |
4304 | unsigned Depth = 0) const { |
4305 | return getCheaperOrNeutralNegatedExpression(Op, DAG, LegalOps, OptForSize, |
4306 | CostThreshold: NegatibleCost::Cheaper, Depth); |
4307 | } |
4308 | |
4309 | /// This is the helper function to return the newly negated expression if |
4310 | /// the cost is not expensive. |
4311 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, |
4312 | bool OptForSize, unsigned Depth = 0) const { |
4313 | NegatibleCost Cost = NegatibleCost::Expensive; |
4314 | return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); |
4315 | } |
4316 | |
4317 | //===--------------------------------------------------------------------===// |
4318 | // Lowering methods - These methods must be implemented by targets so that |
4319 | // the SelectionDAGBuilder code knows how to lower these. |
4320 | // |
4321 | |
4322 | /// Target-specific splitting of values into parts that fit a register |
4323 | /// storing a legal type |
4324 | virtual bool splitValueIntoRegisterParts( |
4325 | SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
4326 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
4327 | return false; |
4328 | } |
4329 | |
4330 | /// Allows the target to handle physreg-carried dependency |
4331 | /// in target-specific way. Used from the ScheduleDAGSDNodes to decide whether |
4332 | /// to add the edge to the dependency graph. |
4333 | /// Def - input: Selection DAG node defininfg physical register |
4334 | /// User - input: Selection DAG node using physical register |
4335 | /// Op - input: Number of User operand |
4336 | /// PhysReg - inout: set to the physical register if the edge is |
4337 | /// necessary, unchanged otherwise |
4338 | /// Cost - inout: physical register copy cost. |
4339 | /// Returns 'true' is the edge is necessary, 'false' otherwise |
4340 | virtual bool checkForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, |
4341 | const TargetRegisterInfo *TRI, |
4342 | const TargetInstrInfo *TII, |
4343 | unsigned &PhysReg, int &Cost) const { |
4344 | return false; |
4345 | } |
4346 | |
4347 | /// Target-specific combining of register parts into its original value |
4348 | virtual SDValue |
4349 | joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, |
4350 | const SDValue *Parts, unsigned NumParts, |
4351 | MVT PartVT, EVT ValueVT, |
4352 | std::optional<CallingConv::ID> CC) const { |
4353 | return SDValue(); |
4354 | } |
4355 | |
4356 | /// This hook must be implemented to lower the incoming (formal) arguments, |
4357 | /// described by the Ins array, into the specified DAG. The implementation |
4358 | /// should fill in the InVals array with legal-type argument values, and |
4359 | /// return the resulting token chain value. |
4360 | virtual SDValue LowerFormalArguments( |
4361 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
4362 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
4363 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
4364 | llvm_unreachable("Not Implemented" ); |
4365 | } |
4366 | |
4367 | /// This structure contains all information that is necessary for lowering |
4368 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
4369 | /// needs to lower a call, and targets will see this struct in their LowerCall |
4370 | /// implementation. |
4371 | struct CallLoweringInfo { |
4372 | SDValue Chain; |
4373 | Type *RetTy = nullptr; |
4374 | bool RetSExt : 1; |
4375 | bool RetZExt : 1; |
4376 | bool IsVarArg : 1; |
4377 | bool IsInReg : 1; |
4378 | bool DoesNotReturn : 1; |
4379 | bool IsReturnValueUsed : 1; |
4380 | bool IsConvergent : 1; |
4381 | bool IsPatchPoint : 1; |
4382 | bool IsPreallocated : 1; |
4383 | bool NoMerge : 1; |
4384 | |
4385 | // IsTailCall should be modified by implementations of |
4386 | // TargetLowering::LowerCall that perform tail call conversions. |
4387 | bool IsTailCall = false; |
4388 | |
4389 | // Is Call lowering done post SelectionDAG type legalization. |
4390 | bool IsPostTypeLegalization = false; |
4391 | |
4392 | unsigned NumFixedArgs = -1; |
4393 | CallingConv::ID CallConv = CallingConv::C; |
4394 | SDValue Callee; |
4395 | ArgListTy Args; |
4396 | SelectionDAG &DAG; |
4397 | SDLoc DL; |
4398 | const CallBase *CB = nullptr; |
4399 | SmallVector<ISD::OutputArg, 32> Outs; |
4400 | SmallVector<SDValue, 32> OutVals; |
4401 | SmallVector<ISD::InputArg, 32> Ins; |
4402 | SmallVector<SDValue, 4> InVals; |
4403 | const ConstantInt *CFIType = nullptr; |
4404 | |
4405 | CallLoweringInfo(SelectionDAG &DAG) |
4406 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
4407 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
4408 | IsPatchPoint(false), IsPreallocated(false), NoMerge(false), |
4409 | DAG(DAG) {} |
4410 | |
4411 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
4412 | DL = dl; |
4413 | return *this; |
4414 | } |
4415 | |
4416 | CallLoweringInfo &setChain(SDValue InChain) { |
4417 | Chain = InChain; |
4418 | return *this; |
4419 | } |
4420 | |
4421 | // setCallee with target/module-specific attributes |
4422 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
4423 | SDValue Target, ArgListTy &&ArgsList) { |
4424 | RetTy = ResultType; |
4425 | Callee = Target; |
4426 | CallConv = CC; |
4427 | NumFixedArgs = ArgsList.size(); |
4428 | Args = std::move(ArgsList); |
4429 | |
4430 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
4431 | MF: &(DAG.getMachineFunction()), CC, Args); |
4432 | return *this; |
4433 | } |
4434 | |
4435 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
4436 | SDValue Target, ArgListTy &&ArgsList, |
4437 | AttributeSet ResultAttrs = {}) { |
4438 | RetTy = ResultType; |
4439 | IsInReg = ResultAttrs.hasAttribute(Attribute::InReg); |
4440 | RetSExt = ResultAttrs.hasAttribute(Attribute::SExt); |
4441 | RetZExt = ResultAttrs.hasAttribute(Attribute::ZExt); |
4442 | NoMerge = ResultAttrs.hasAttribute(Attribute::NoMerge); |
4443 | |
4444 | Callee = Target; |
4445 | CallConv = CC; |
4446 | NumFixedArgs = ArgsList.size(); |
4447 | Args = std::move(ArgsList); |
4448 | return *this; |
4449 | } |
4450 | |
4451 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
4452 | SDValue Target, ArgListTy &&ArgsList, |
4453 | const CallBase &Call) { |
4454 | RetTy = ResultType; |
4455 | |
4456 | IsInReg = Call.hasRetAttr(Attribute::InReg); |
4457 | DoesNotReturn = |
4458 | Call.doesNotReturn() || |
4459 | (!isa<InvokeInst>(Val: Call) && isa<UnreachableInst>(Val: Call.getNextNode())); |
4460 | IsVarArg = FTy->isVarArg(); |
4461 | IsReturnValueUsed = !Call.use_empty(); |
4462 | RetSExt = Call.hasRetAttr(Attribute::SExt); |
4463 | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
4464 | NoMerge = Call.hasFnAttr(Attribute::NoMerge); |
4465 | |
4466 | Callee = Target; |
4467 | |
4468 | CallConv = Call.getCallingConv(); |
4469 | NumFixedArgs = FTy->getNumParams(); |
4470 | Args = std::move(ArgsList); |
4471 | |
4472 | CB = &Call; |
4473 | |
4474 | return *this; |
4475 | } |
4476 | |
4477 | CallLoweringInfo &setInRegister(bool Value = true) { |
4478 | IsInReg = Value; |
4479 | return *this; |
4480 | } |
4481 | |
4482 | CallLoweringInfo &setNoReturn(bool Value = true) { |
4483 | DoesNotReturn = Value; |
4484 | return *this; |
4485 | } |
4486 | |
4487 | CallLoweringInfo &setVarArg(bool Value = true) { |
4488 | IsVarArg = Value; |
4489 | return *this; |
4490 | } |
4491 | |
4492 | CallLoweringInfo &setTailCall(bool Value = true) { |
4493 | IsTailCall = Value; |
4494 | return *this; |
4495 | } |
4496 | |
4497 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
4498 | IsReturnValueUsed = !Value; |
4499 | return *this; |
4500 | } |
4501 | |
4502 | CallLoweringInfo &setConvergent(bool Value = true) { |
4503 | IsConvergent = Value; |
4504 | return *this; |
4505 | } |
4506 | |
4507 | CallLoweringInfo &setSExtResult(bool Value = true) { |
4508 | RetSExt = Value; |
4509 | return *this; |
4510 | } |
4511 | |
4512 | CallLoweringInfo &setZExtResult(bool Value = true) { |
4513 | RetZExt = Value; |
4514 | return *this; |
4515 | } |
4516 | |
4517 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
4518 | IsPatchPoint = Value; |
4519 | return *this; |
4520 | } |
4521 | |
4522 | CallLoweringInfo &setIsPreallocated(bool Value = true) { |
4523 | IsPreallocated = Value; |
4524 | return *this; |
4525 | } |
4526 | |
4527 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
4528 | IsPostTypeLegalization = Value; |
4529 | return *this; |
4530 | } |
4531 | |
4532 | CallLoweringInfo &setCFIType(const ConstantInt *Type) { |
4533 | CFIType = Type; |
4534 | return *this; |
4535 | } |
4536 | |
4537 | ArgListTy &getArgs() { |
4538 | return Args; |
4539 | } |
4540 | }; |
4541 | |
4542 | /// This structure is used to pass arguments to makeLibCall function. |
4543 | struct MakeLibCallOptions { |
4544 | // By passing type list before soften to makeLibCall, the target hook |
4545 | // shouldExtendTypeInLibCall can get the original type before soften. |
4546 | ArrayRef<EVT> OpsVTBeforeSoften; |
4547 | EVT RetVTBeforeSoften; |
4548 | bool IsSExt : 1; |
4549 | bool DoesNotReturn : 1; |
4550 | bool IsReturnValueUsed : 1; |
4551 | bool IsPostTypeLegalization : 1; |
4552 | bool IsSoften : 1; |
4553 | |
4554 | MakeLibCallOptions() |
4555 | : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), |
4556 | IsPostTypeLegalization(false), IsSoften(false) {} |
4557 | |
4558 | MakeLibCallOptions &setSExt(bool Value = true) { |
4559 | IsSExt = Value; |
4560 | return *this; |
4561 | } |
4562 | |
4563 | MakeLibCallOptions &setNoReturn(bool Value = true) { |
4564 | DoesNotReturn = Value; |
4565 | return *this; |
4566 | } |
4567 | |
4568 | MakeLibCallOptions &setDiscardResult(bool Value = true) { |
4569 | IsReturnValueUsed = !Value; |
4570 | return *this; |
4571 | } |
4572 | |
4573 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { |
4574 | IsPostTypeLegalization = Value; |
4575 | return *this; |
4576 | } |
4577 | |
4578 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, |
4579 | bool Value = true) { |
4580 | OpsVTBeforeSoften = OpsVT; |
4581 | RetVTBeforeSoften = RetVT; |
4582 | IsSoften = Value; |
4583 | return *this; |
4584 | } |
4585 | }; |
4586 | |
4587 | /// This function lowers an abstract call to a function into an actual call. |
4588 | /// This returns a pair of operands. The first element is the return value |
4589 | /// for the function (if RetTy is not VoidTy). The second element is the |
4590 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
4591 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
4592 | |
4593 | /// This hook must be implemented to lower calls into the specified |
4594 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
4595 | /// and the values to be returned by the call are described by the Ins |
4596 | /// array. The implementation should fill in the InVals array with legal-type |
4597 | /// return values from the call, and return the resulting token chain value. |
4598 | virtual SDValue |
4599 | LowerCall(CallLoweringInfo &/*CLI*/, |
4600 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
4601 | llvm_unreachable("Not Implemented" ); |
4602 | } |
4603 | |
4604 | /// Target-specific cleanup for formal ByVal parameters. |
4605 | virtual void HandleByVal(CCState *, unsigned &, Align) const {} |
4606 | |
4607 | /// This hook should be implemented to check whether the return values |
4608 | /// described by the Outs array can fit into the return registers. If false |
4609 | /// is returned, an sret-demotion is performed. |
4610 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
4611 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
4612 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
4613 | LLVMContext &/*Context*/) const |
4614 | { |
4615 | // Return true by default to get preexisting behavior. |
4616 | return true; |
4617 | } |
4618 | |
4619 | /// This hook must be implemented to lower outgoing return values, described |
4620 | /// by the Outs array, into the specified DAG. The implementation should |
4621 | /// return the resulting token chain value. |
4622 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
4623 | bool /*isVarArg*/, |
4624 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
4625 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
4626 | const SDLoc & /*dl*/, |
4627 | SelectionDAG & /*DAG*/) const { |
4628 | llvm_unreachable("Not Implemented" ); |
4629 | } |
4630 | |
4631 | /// Return true if result of the specified node is used by a return node |
4632 | /// only. It also compute and return the input chain for the tail call. |
4633 | /// |
4634 | /// This is used to determine whether it is possible to codegen a libcall as |
4635 | /// tail call at legalization time. |
4636 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
4637 | return false; |
4638 | } |
4639 | |
4640 | /// Return true if the target may be able emit the call instruction as a tail |
4641 | /// call. This is used by optimization passes to determine if it's profitable |
4642 | /// to duplicate return instructions to enable tailcall optimization. |
4643 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
4644 | return false; |
4645 | } |
4646 | |
4647 | /// Return the builtin name for the __builtin___clear_cache intrinsic |
4648 | /// Default is to invoke the clear cache library call |
4649 | virtual const char * getClearCacheBuiltinName() const { |
4650 | return "__clear_cache" ; |
4651 | } |
4652 | |
4653 | /// Return the register ID of the name passed in. Used by named register |
4654 | /// global variables extension. There is no target-independent behaviour |
4655 | /// so the default action is to bail. |
4656 | virtual Register getRegisterByName(const char* RegName, LLT Ty, |
4657 | const MachineFunction &MF) const { |
4658 | report_fatal_error(reason: "Named registers not implemented for this target" ); |
4659 | } |
4660 | |
4661 | /// Return the type that should be used to zero or sign extend a |
4662 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
4663 | /// require the return type to be promoted, but this is not true all the time, |
4664 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
4665 | /// conventions. The frontend should handle this and include all of the |
4666 | /// necessary information. |
4667 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
4668 | ISD::NodeType /*ExtendKind*/) const { |
4669 | EVT MinVT = getRegisterType(MVT::i32); |
4670 | return VT.bitsLT(VT: MinVT) ? MinVT : VT; |
4671 | } |
4672 | |
4673 | /// For some targets, an LLVM struct type must be broken down into multiple |
4674 | /// simple types, but the calling convention specifies that the entire struct |
4675 | /// must be passed in a block of consecutive registers. |
4676 | virtual bool |
4677 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
4678 | bool isVarArg, |
4679 | const DataLayout &DL) const { |
4680 | return false; |
4681 | } |
4682 | |
4683 | /// For most targets, an LLVM type must be broken down into multiple |
4684 | /// smaller types. Usually the halves are ordered according to the endianness |
4685 | /// but for some platform that would break. So this method will default to |
4686 | /// matching the endianness but can be overridden. |
4687 | virtual bool |
4688 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
4689 | return DL.isLittleEndian(); |
4690 | } |
4691 | |
4692 | /// Returns a 0 terminated array of registers that can be safely used as |
4693 | /// scratch registers. |
4694 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
4695 | return nullptr; |
4696 | } |
4697 | |
4698 | /// Returns a 0 terminated array of rounding control registers that can be |
4699 | /// attached into strict FP call. |
4700 | virtual ArrayRef<MCPhysReg> getRoundingControlRegisters() const { |
4701 | return ArrayRef<MCPhysReg>(); |
4702 | } |
4703 | |
4704 | /// This callback is used to prepare for a volatile or atomic load. |
4705 | /// It takes a chain node as input and returns the chain for the load itself. |
4706 | /// |
4707 | /// Having a callback like this is necessary for targets like SystemZ, |
4708 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
4709 | /// even if a cache-coherent store is performed by another CPU. The default |
4710 | /// implementation does nothing. |
4711 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
4712 | SelectionDAG &DAG) const { |
4713 | return Chain; |
4714 | } |
4715 | |
4716 | /// This callback is invoked by the type legalizer to legalize nodes with an |
4717 | /// illegal operand type but legal result types. It replaces the |
4718 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
4719 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
4720 | /// use this callback. |
4721 | /// |
4722 | /// TODO: Consider merging with ReplaceNodeResults. |
4723 | /// |
4724 | /// The target places new result values for the node in Results (their number |
4725 | /// and types must exactly match those of the original return values of |
4726 | /// the node), or leaves Results empty, which indicates that the node is not |
4727 | /// to be custom lowered after all. |
4728 | /// The default implementation calls LowerOperation. |
4729 | virtual void LowerOperationWrapper(SDNode *N, |
4730 | SmallVectorImpl<SDValue> &Results, |
4731 | SelectionDAG &DAG) const; |
4732 | |
4733 | /// This callback is invoked for operations that are unsupported by the |
4734 | /// target, which are registered to use 'custom' lowering, and whose defined |
4735 | /// values are all legal. If the target has no operations that require custom |
4736 | /// lowering, it need not implement this. The default implementation of this |
4737 | /// aborts. |
4738 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
4739 | |
4740 | /// This callback is invoked when a node result type is illegal for the |
4741 | /// target, and the operation was registered to use 'custom' lowering for that |
4742 | /// result type. The target places new result values for the node in Results |
4743 | /// (their number and types must exactly match those of the original return |
4744 | /// values of the node), or leaves Results empty, which indicates that the |
4745 | /// node is not to be custom lowered after all. |
4746 | /// |
4747 | /// If the target has no operations that require custom lowering, it need not |
4748 | /// implement this. The default implementation aborts. |
4749 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
4750 | SmallVectorImpl<SDValue> &/*Results*/, |
4751 | SelectionDAG &/*DAG*/) const { |
4752 | llvm_unreachable("ReplaceNodeResults not implemented for this target!" ); |
4753 | } |
4754 | |
4755 | /// This method returns the name of a target specific DAG node. |
4756 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
4757 | |
4758 | /// This method returns a target specific FastISel object, or null if the |
4759 | /// target does not support "fast" ISel. |
4760 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
4761 | const TargetLibraryInfo *) const { |
4762 | return nullptr; |
4763 | } |
4764 | |
4765 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
4766 | SelectionDAG &DAG) const; |
4767 | |
4768 | //===--------------------------------------------------------------------===// |
4769 | // Inline Asm Support hooks |
4770 | // |
4771 | |
4772 | /// This hook allows the target to expand an inline asm call to be explicit |
4773 | /// llvm code if it wants to. This is useful for turning simple inline asms |
4774 | /// into LLVM intrinsics, which gives the compiler more information about the |
4775 | /// behavior of the code. |
4776 | virtual bool ExpandInlineAsm(CallInst *) const { |
4777 | return false; |
4778 | } |
4779 | |
4780 | enum ConstraintType { |
4781 | C_Register, // Constraint represents specific register(s). |
4782 | C_RegisterClass, // Constraint represents any of register(s) in class. |
4783 | C_Memory, // Memory constraint. |
4784 | C_Address, // Address constraint. |
4785 | C_Immediate, // Requires an immediate. |
4786 | C_Other, // Something else. |
4787 | C_Unknown // Unsupported constraint. |
4788 | }; |
4789 | |
4790 | enum ConstraintWeight { |
4791 | // Generic weights. |
4792 | CW_Invalid = -1, // No match. |
4793 | CW_Okay = 0, // Acceptable. |
4794 | CW_Good = 1, // Good weight. |
4795 | CW_Better = 2, // Better weight. |
4796 | CW_Best = 3, // Best weight. |
4797 | |
4798 | // Well-known weights. |
4799 | CW_SpecificReg = CW_Okay, // Specific register operands. |
4800 | CW_Register = CW_Good, // Register operands. |
4801 | CW_Memory = CW_Better, // Memory operands. |
4802 | CW_Constant = CW_Best, // Constant operand. |
4803 | CW_Default = CW_Okay // Default or don't know type. |
4804 | }; |
4805 | |
4806 | /// This contains information for each constraint that we are lowering. |
4807 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
4808 | /// This contains the actual string for the code, like "m". TargetLowering |
4809 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
4810 | /// matches the operand. |
4811 | std::string ConstraintCode; |
4812 | |
4813 | /// Information about the constraint code, e.g. Register, RegisterClass, |
4814 | /// Memory, Other, Unknown. |
4815 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
4816 | |
4817 | /// If this is the result output operand or a clobber, this is null, |
4818 | /// otherwise it is the incoming operand to the CallInst. This gets |
4819 | /// modified as the asm is processed. |
4820 | Value *CallOperandVal = nullptr; |
4821 | |
4822 | /// The ValueType for the operand value. |
4823 | MVT ConstraintVT = MVT::Other; |
4824 | |
4825 | /// Copy constructor for copying from a ConstraintInfo. |
4826 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
4827 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
4828 | |
4829 | /// Return true of this is an input operand that is a matching constraint |
4830 | /// like "4". |
4831 | bool isMatchingInputConstraint() const; |
4832 | |
4833 | /// If this is an input matching constraint, this method returns the output |
4834 | /// operand it matches. |
4835 | unsigned getMatchedOperand() const; |
4836 | }; |
4837 | |
4838 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
4839 | |
4840 | /// Split up the constraint string from the inline assembly value into the |
4841 | /// specific constraints and their prefixes, and also tie in the associated |
4842 | /// operand values. If this returns an empty vector, and if the constraint |
4843 | /// string itself isn't empty, there was an error parsing. |
4844 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
4845 | const TargetRegisterInfo *TRI, |
4846 | const CallBase &Call) const; |
4847 | |
4848 | /// Examine constraint type and operand type and determine a weight value. |
4849 | /// The operand object must already have been set up with the operand type. |
4850 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
4851 | AsmOperandInfo &info, int maIndex) const; |
4852 | |
4853 | /// Examine constraint string and operand type and determine a weight value. |
4854 | /// The operand object must already have been set up with the operand type. |
4855 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
4856 | AsmOperandInfo &info, const char *constraint) const; |
4857 | |
4858 | /// Determines the constraint code and constraint type to use for the specific |
4859 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
4860 | /// If the actual operand being passed in is available, it can be passed in as |
4861 | /// Op, otherwise an empty SDValue can be passed. |
4862 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
4863 | SDValue Op, |
4864 | SelectionDAG *DAG = nullptr) const; |
4865 | |
4866 | /// Given a constraint, return the type of constraint it is for this target. |
4867 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
4868 | |
4869 | using ConstraintPair = std::pair<StringRef, TargetLowering::ConstraintType>; |
4870 | using ConstraintGroup = SmallVector<ConstraintPair>; |
4871 | /// Given an OpInfo with list of constraints codes as strings, return a |
4872 | /// sorted Vector of pairs of constraint codes and their types in priority of |
4873 | /// what we'd prefer to lower them as. This may contain immediates that |
4874 | /// cannot be lowered, but it is meant to be a machine agnostic order of |
4875 | /// preferences. |
4876 | ConstraintGroup getConstraintPreferences(AsmOperandInfo &OpInfo) const; |
4877 | |
4878 | /// Given a physical register constraint (e.g. {edx}), return the register |
4879 | /// number and the register class for the register. |
4880 | /// |
4881 | /// Given a register class constraint, like 'r', if this corresponds directly |
4882 | /// to an LLVM register class, return a register of 0 and the register class |
4883 | /// pointer. |
4884 | /// |
4885 | /// This should only be used for C_Register constraints. On error, this |
4886 | /// returns a register number of 0 and a null register class pointer. |
4887 | virtual std::pair<unsigned, const TargetRegisterClass *> |
4888 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
4889 | StringRef Constraint, MVT VT) const; |
4890 | |
4891 | virtual InlineAsm::ConstraintCode |
4892 | getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
4893 | if (ConstraintCode == "m" ) |
4894 | return InlineAsm::ConstraintCode::m; |
4895 | if (ConstraintCode == "o" ) |
4896 | return InlineAsm::ConstraintCode::o; |
4897 | if (ConstraintCode == "X" ) |
4898 | return InlineAsm::ConstraintCode::X; |
4899 | if (ConstraintCode == "p" ) |
4900 | return InlineAsm::ConstraintCode::p; |
4901 | return InlineAsm::ConstraintCode::Unknown; |
4902 | } |
4903 | |
4904 | /// Try to replace an X constraint, which matches anything, with another that |
4905 | /// has more specific requirements based on the type of the corresponding |
4906 | /// operand. This returns null if there is no replacement to make. |
4907 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
4908 | |
4909 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
4910 | /// add anything to Ops. |
4911 | virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, |
4912 | std::vector<SDValue> &Ops, |
4913 | SelectionDAG &DAG) const; |
4914 | |
4915 | // Lower custom output constraints. If invalid, return SDValue(). |
4916 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Glue, |
4917 | const SDLoc &DL, |
4918 | const AsmOperandInfo &OpInfo, |
4919 | SelectionDAG &DAG) const; |
4920 | |
4921 | // Targets may override this function to collect operands from the CallInst |
4922 | // and for example, lower them into the SelectionDAG operands. |
4923 | virtual void CollectTargetIntrinsicOperands(const CallInst &I, |
4924 | SmallVectorImpl<SDValue> &Ops, |
4925 | SelectionDAG &DAG) const; |
4926 | |
4927 | //===--------------------------------------------------------------------===// |
4928 | // Div utility functions |
4929 | // |
4930 | |
4931 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
4932 | SmallVectorImpl<SDNode *> &Created) const; |
4933 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
4934 | SmallVectorImpl<SDNode *> &Created) const; |
4935 | // Build sdiv by power-of-2 with conditional move instructions |
4936 | SDValue buildSDIVPow2WithCMov(SDNode *N, const APInt &Divisor, |
4937 | SelectionDAG &DAG, |
4938 | SmallVectorImpl<SDNode *> &Created) const; |
4939 | |
4940 | /// Targets may override this function to provide custom SDIV lowering for |
4941 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
4942 | /// assumes SDIV is expensive and replaces it with a series of other integer |
4943 | /// operations. |
4944 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
4945 | SelectionDAG &DAG, |
4946 | SmallVectorImpl<SDNode *> &Created) const; |
4947 | |
4948 | /// Targets may override this function to provide custom SREM lowering for |
4949 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
4950 | /// assumes SREM is expensive and replaces it with a series of other integer |
4951 | /// operations. |
4952 | virtual SDValue BuildSREMPow2(SDNode *N, const APInt &Divisor, |
4953 | SelectionDAG &DAG, |
4954 | SmallVectorImpl<SDNode *> &Created) const; |
4955 | |
4956 | /// Indicate whether this target prefers to combine FDIVs with the same |
4957 | /// divisor. If the transform should never be done, return zero. If the |
4958 | /// transform should be done, return the minimum number of divisor uses |
4959 | /// that must exist. |
4960 | virtual unsigned combineRepeatedFPDivisors() const { |
4961 | return 0; |
4962 | } |
4963 | |
4964 | /// Hooks for building estimates in place of slower divisions and square |
4965 | /// roots. |
4966 | |
4967 | /// Return either a square root or its reciprocal estimate value for the input |
4968 | /// operand. |
4969 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
4970 | /// 'Enabled' as set by a potential default override attribute. |
4971 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
4972 | /// refinement iterations required to generate a sufficient (though not |
4973 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
4974 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
4975 | /// algorithm implementation that uses either one or two constants. |
4976 | /// The boolean Reciprocal is used to select whether the estimate is for the |
4977 | /// square root of the input operand or the reciprocal of its square root. |
4978 | /// A target may choose to implement its own refinement within this function. |
4979 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
4980 | /// any further refinement of the estimate. |
4981 | /// An empty SDValue return means no estimate sequence can be created. |
4982 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
4983 | int Enabled, int &RefinementSteps, |
4984 | bool &UseOneConstNR, bool Reciprocal) const { |
4985 | return SDValue(); |
4986 | } |
4987 | |
4988 | /// Try to convert the fminnum/fmaxnum to a compare/select sequence. This is |
4989 | /// required for correctness since InstCombine might have canonicalized a |
4990 | /// fcmp+select sequence to a FMINNUM/FMAXNUM intrinsic. If we were to fall |
4991 | /// through to the default expansion/soften to libcall, we might introduce a |
4992 | /// link-time dependency on libm into a file that originally did not have one. |
4993 | SDValue createSelectForFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const; |
4994 | |
4995 | /// Return a reciprocal estimate value for the input operand. |
4996 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
4997 | /// 'Enabled' as set by a potential default override attribute. |
4998 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
4999 | /// refinement iterations required to generate a sufficient (though not |
5000 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
5001 | /// A target may choose to implement its own refinement within this function. |
5002 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
5003 | /// any further refinement of the estimate. |
5004 | /// An empty SDValue return means no estimate sequence can be created. |
5005 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
5006 | int Enabled, int &RefinementSteps) const { |
5007 | return SDValue(); |
5008 | } |
5009 | |
5010 | /// Return a target-dependent comparison result if the input operand is |
5011 | /// suitable for use with a square root estimate calculation. For example, the |
5012 | /// comparison may check if the operand is NAN, INF, zero, normal, etc. The |
5013 | /// result should be used as the condition operand for a select or branch. |
5014 | virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, |
5015 | const DenormalMode &Mode) const; |
5016 | |
5017 | /// Return a target-dependent result if the input operand is not suitable for |
5018 | /// use with a square root estimate calculation. |
5019 | virtual SDValue getSqrtResultForDenormInput(SDValue Operand, |
5020 | SelectionDAG &DAG) const { |
5021 | return DAG.getConstantFP(Val: 0.0, DL: SDLoc(Operand), VT: Operand.getValueType()); |
5022 | } |
5023 | |
5024 | //===--------------------------------------------------------------------===// |
5025 | // Legalization utility functions |
5026 | // |
5027 | |
5028 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
5029 | /// respectively, each computing an n/2-bit part of the result. |
5030 | /// \param Result A vector that will be filled with the parts of the result |
5031 | /// in little-endian order. |
5032 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5033 | /// if you want to control how low bits are extracted from the LHS. |
5034 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5035 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5036 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5037 | /// \returns true if the node has been expanded, false if it has not |
5038 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, |
5039 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
5040 | SelectionDAG &DAG, MulExpansionKind Kind, |
5041 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5042 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5043 | |
5044 | /// Expand a MUL into two nodes. One that computes the high bits of |
5045 | /// the result and one that computes the low bits. |
5046 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
5047 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
5048 | /// if you want to control how low bits are extracted from the LHS. |
5049 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
5050 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
5051 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
5052 | /// \returns true if the node has been expanded. false if it has not |
5053 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
5054 | SelectionDAG &DAG, MulExpansionKind Kind, |
5055 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
5056 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
5057 | |
5058 | /// Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit |
5059 | /// urem by constant and other arithmetic ops. The n/2-bit urem by constant |
5060 | /// will be expanded by DAGCombiner. This is not possible for all constant |
5061 | /// divisors. |
5062 | /// \param N Node to expand |
5063 | /// \param Result A vector that will be filled with the lo and high parts of |
5064 | /// the results. For *DIVREM, this will be the quotient parts followed |
5065 | /// by the remainder parts. |
5066 | /// \param HiLoVT The value type to use for the Lo and Hi parts. Should be |
5067 | /// half of VT. |
5068 | /// \param LL Low bits of the LHS of the operation. You can use this |
5069 | /// parameter if you want to control how low bits are extracted from |
5070 | /// the LHS. |
5071 | /// \param LH High bits of the LHS of the operation. See LL for meaning. |
5072 | /// \returns true if the node has been expanded, false if it has not. |
5073 | bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl<SDValue> &Result, |
5074 | EVT HiLoVT, SelectionDAG &DAG, |
5075 | SDValue LL = SDValue(), |
5076 | SDValue LH = SDValue()) const; |
5077 | |
5078 | /// Expand funnel shift. |
5079 | /// \param N Node to expand |
5080 | /// \returns The expansion if successful, SDValue() otherwise |
5081 | SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const; |
5082 | |
5083 | /// Expand rotations. |
5084 | /// \param N Node to expand |
5085 | /// \param AllowVectorOps expand vector rotate, this should only be performed |
5086 | /// if the legalization is happening outside of LegalizeVectorOps |
5087 | /// \returns The expansion if successful, SDValue() otherwise |
5088 | SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const; |
5089 | |
5090 | /// Expand shift-by-parts. |
5091 | /// \param N Node to expand |
5092 | /// \param Lo lower-output-part after conversion |
5093 | /// \param Hi upper-output-part after conversion |
5094 | void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, |
5095 | SelectionDAG &DAG) const; |
5096 | |
5097 | /// Expand float(f32) to SINT(i64) conversion |
5098 | /// \param N Node to expand |
5099 | /// \param Result output after conversion |
5100 | /// \returns True, if the expansion was successful, false otherwise |
5101 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
5102 | |
5103 | /// Expand float to UINT conversion |
5104 | /// \param N Node to expand |
5105 | /// \param Result output after conversion |
5106 | /// \param Chain output chain after conversion |
5107 | /// \returns True, if the expansion was successful, false otherwise |
5108 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, |
5109 | SelectionDAG &DAG) const; |
5110 | |
5111 | /// Expand UINT(i64) to double(f64) conversion |
5112 | /// \param N Node to expand |
5113 | /// \param Result output after conversion |
5114 | /// \param Chain output chain after conversion |
5115 | /// \returns True, if the expansion was successful, false otherwise |
5116 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, |
5117 | SelectionDAG &DAG) const; |
5118 | |
5119 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
5120 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
5121 | |
5122 | /// Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max. |
5123 | /// \param N Node to expand |
5124 | /// \returns The expansion result |
5125 | SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const; |
5126 | |
5127 | /// Expand check for floating point class. |
5128 | /// \param ResultVT The type of intrinsic call result. |
5129 | /// \param Op The tested value. |
5130 | /// \param Test The test to perform. |
5131 | /// \param Flags The optimization flags. |
5132 | /// \returns The expansion result or SDValue() if it fails. |
5133 | SDValue expandIS_FPCLASS(EVT ResultVT, SDValue Op, FPClassTest Test, |
5134 | SDNodeFlags Flags, const SDLoc &DL, |
5135 | SelectionDAG &DAG) const; |
5136 | |
5137 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
5138 | /// vector nodes can only succeed if all operations are legal/custom. |
5139 | /// \param N Node to expand |
5140 | /// \returns The expansion result or SDValue() if it fails. |
5141 | SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5142 | |
5143 | /// Expand VP_CTPOP nodes. |
5144 | /// \returns The expansion result or SDValue() if it fails. |
5145 | SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const; |
5146 | |
5147 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
5148 | /// vector nodes can only succeed if all operations are legal/custom. |
5149 | /// \param N Node to expand |
5150 | /// \returns The expansion result or SDValue() if it fails. |
5151 | SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5152 | |
5153 | /// Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes. |
5154 | /// \param N Node to expand |
5155 | /// \returns The expansion result or SDValue() if it fails. |
5156 | SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const; |
5157 | |
5158 | /// Expand CTTZ via Table Lookup. |
5159 | /// \param N Node to expand |
5160 | /// \returns The expansion result or SDValue() if it fails. |
5161 | SDValue CTTZTableLookup(SDNode *N, SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5162 | SDValue Op, unsigned NumBitsPerElt) const; |
5163 | |
5164 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
5165 | /// vector nodes can only succeed if all operations are legal/custom. |
5166 | /// \param N Node to expand |
5167 | /// \returns The expansion result or SDValue() if it fails. |
5168 | SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5169 | |
5170 | /// Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes. |
5171 | /// \param N Node to expand |
5172 | /// \returns The expansion result or SDValue() if it fails. |
5173 | SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const; |
5174 | |
5175 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
5176 | /// vector nodes can only succeed if all operations are legal/custom. |
5177 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
5178 | /// \param N Node to expand |
5179 | /// \param IsNegative indicate negated abs |
5180 | /// \returns The expansion result or SDValue() if it fails. |
5181 | SDValue expandABS(SDNode *N, SelectionDAG &DAG, |
5182 | bool IsNegative = false) const; |
5183 | |
5184 | /// Expand ABDS/ABDU nodes. Expands vector/scalar ABDS/ABDU nodes. |
5185 | /// \param N Node to expand |
5186 | /// \returns The expansion result or SDValue() if it fails. |
5187 | SDValue expandABD(SDNode *N, SelectionDAG &DAG) const; |
5188 | |
5189 | /// Expand BSWAP nodes. Expands scalar/vector BSWAP nodes with i16/i32/i64 |
5190 | /// scalar types. Returns SDValue() if expand fails. |
5191 | /// \param N Node to expand |
5192 | /// \returns The expansion result or SDValue() if it fails. |
5193 | SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5194 | |
5195 | /// Expand VP_BSWAP nodes. Expands VP_BSWAP nodes with |
5196 | /// i16/i32/i64 scalar types. Returns SDValue() if expand fails. \param N Node |
5197 | /// to expand \returns The expansion result or SDValue() if it fails. |
5198 | SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const; |
5199 | |
5200 | /// Expand BITREVERSE nodes. Expands scalar/vector BITREVERSE nodes. |
5201 | /// Returns SDValue() if expand fails. |
5202 | /// \param N Node to expand |
5203 | /// \returns The expansion result or SDValue() if it fails. |
5204 | SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5205 | |
5206 | /// Expand VP_BITREVERSE nodes. Expands VP_BITREVERSE nodes with |
5207 | /// i8/i16/i32/i64 scalar types. \param N Node to expand \returns The |
5208 | /// expansion result or SDValue() if it fails. |
5209 | SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const; |
5210 | |
5211 | /// Turn load of vector type into a load of the individual elements. |
5212 | /// \param LD load to expand |
5213 | /// \returns BUILD_VECTOR and TokenFactor nodes. |
5214 | std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, |
5215 | SelectionDAG &DAG) const; |
5216 | |
5217 | // Turn a store of a vector type into stores of the individual elements. |
5218 | /// \param ST Store with a vector value type |
5219 | /// \returns TokenFactor of the individual store chains. |
5220 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5221 | |
5222 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
5223 | /// possibly more for vectors. |
5224 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
5225 | SelectionDAG &DAG) const; |
5226 | |
5227 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
5228 | /// possibly more for vectors. |
5229 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
5230 | |
5231 | /// Increments memory address \p Addr according to the type of the value |
5232 | /// \p DataVT that should be stored. If the data is stored in compressed |
5233 | /// form, the memory address should be incremented according to the number of |
5234 | /// the stored elements. This number is equal to the number of '1's bits |
5235 | /// in the \p Mask. |
5236 | /// \p DataVT is a vector type. \p Mask is a vector value. |
5237 | /// \p DataVT and \p Mask have the same number of vector elements. |
5238 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
5239 | EVT DataVT, SelectionDAG &DAG, |
5240 | bool IsCompressedMemory) const; |
5241 | |
5242 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
5243 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
5244 | /// bounds the returned pointer is unspecified, but will be within the vector |
5245 | /// bounds. |
5246 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5247 | SDValue Index) const; |
5248 | |
5249 | /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located |
5250 | /// in memory for a vector of type \p VecVT starting at a base address of |
5251 | /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the |
5252 | /// returned pointer is unspecified, but the value returned will be such that |
5253 | /// the entire subvector would be within the vector bounds. |
5254 | SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
5255 | EVT SubVecVT, SDValue Index) const; |
5256 | |
5257 | /// Method for building the DAG expansion of ISD::[US][MIN|MAX]. This |
5258 | /// method accepts integers as its arguments. |
5259 | SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const; |
5260 | |
5261 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
5262 | /// method accepts integers as its arguments. |
5263 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
5264 | |
5265 | /// Method for building the DAG expansion of ISD::[US]SHLSAT. This |
5266 | /// method accepts integers as its arguments. |
5267 | SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; |
5268 | |
5269 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This |
5270 | /// method accepts integers as its arguments. |
5271 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
5272 | |
5273 | /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This |
5274 | /// method accepts integers as its arguments. |
5275 | /// Note: This method may fail if the division could not be performed |
5276 | /// within the type. Clients must retry with a wider type if this happens. |
5277 | SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, |
5278 | SDValue LHS, SDValue RHS, |
5279 | unsigned Scale, SelectionDAG &DAG) const; |
5280 | |
5281 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
5282 | /// always suceeds and populates the Result and Overflow arguments. |
5283 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5284 | SelectionDAG &DAG) const; |
5285 | |
5286 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
5287 | /// always suceeds and populates the Result and Overflow arguments. |
5288 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5289 | SelectionDAG &DAG) const; |
5290 | |
5291 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
5292 | /// expansion was successful and populates the Result and Overflow arguments. |
5293 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
5294 | SelectionDAG &DAG) const; |
5295 | |
5296 | /// forceExpandWideMUL - Unconditionally expand a MUL into either a libcall or |
5297 | /// brute force via a wide multiplication. The expansion works by |
5298 | /// attempting to do a multiplication on a wider type twice the size of the |
5299 | /// original operands. LL and LH represent the lower and upper halves of the |
5300 | /// first operand. RL and RH represent the lower and upper halves of the |
5301 | /// second operand. The upper and lower halves of the result are stored in Lo |
5302 | /// and Hi. |
5303 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5304 | EVT WideVT, const SDValue LL, const SDValue LH, |
5305 | const SDValue RL, const SDValue RH, SDValue &Lo, |
5306 | SDValue &Hi) const; |
5307 | |
5308 | /// Same as above, but creates the upper halves of each operand by |
5309 | /// sign/zero-extending the operands. |
5310 | void forceExpandWideMUL(SelectionDAG &DAG, const SDLoc &dl, bool Signed, |
5311 | const SDValue LHS, const SDValue RHS, SDValue &Lo, |
5312 | SDValue &Hi) const; |
5313 | |
5314 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
5315 | /// only the first Count elements of the vector are used. |
5316 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
5317 | |
5318 | /// Expand a VECREDUCE_SEQ_* into an explicit ordered calculation. |
5319 | SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const; |
5320 | |
5321 | /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. |
5322 | /// Returns true if the expansion was successful. |
5323 | bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; |
5324 | |
5325 | /// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This |
5326 | /// method accepts vectors as its arguments. |
5327 | SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const; |
5328 | |
5329 | /// Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC |
5330 | /// on the current target. A VP_SETCC will additionally be given a Mask |
5331 | /// and/or EVL not equal to SDValue(). |
5332 | /// |
5333 | /// If the SETCC has been legalized using AND / OR, then the legalized node |
5334 | /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert |
5335 | /// will be set to false. This will also hold if the VP_SETCC has been |
5336 | /// legalized using VP_AND / VP_OR. |
5337 | /// |
5338 | /// If the SETCC / VP_SETCC has been legalized by using |
5339 | /// getSetCCSwappedOperands(), then the values of LHS and RHS will be |
5340 | /// swapped, CC will be set to the new condition, and NeedInvert will be set |
5341 | /// to false. |
5342 | /// |
5343 | /// If the SETCC / VP_SETCC has been legalized using the inverse condcode, |
5344 | /// then LHS and RHS will be unchanged, CC will set to the inverted condcode, |
5345 | /// and NeedInvert will be set to true. The caller must invert the result of |
5346 | /// the SETCC with SelectionDAG::getLogicalNOT() or take equivalent action to |
5347 | /// swap the effect of a true/false result. |
5348 | /// |
5349 | /// \returns true if the SETCC / VP_SETCC has been legalized, false if it |
5350 | /// hasn't. |
5351 | bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, |
5352 | SDValue &RHS, SDValue &CC, SDValue Mask, |
5353 | SDValue EVL, bool &NeedInvert, const SDLoc &dl, |
5354 | SDValue &Chain, bool IsSignaling = false) const; |
5355 | |
5356 | //===--------------------------------------------------------------------===// |
5357 | // Instruction Emitting Hooks |
5358 | // |
5359 | |
5360 | /// This method should be implemented by targets that mark instructions with |
5361 | /// the 'usesCustomInserter' flag. These instructions are special in various |
5362 | /// ways, which require special support to insert. The specified MachineInstr |
5363 | /// is created but not inserted into any basic blocks, and this method is |
5364 | /// called to expand it into a sequence of instructions, potentially also |
5365 | /// creating new basic blocks and control flow. |
5366 | /// As long as the returned basic block is different (i.e., we created a new |
5367 | /// one), the custom inserter is free to modify the rest of \p MBB. |
5368 | virtual MachineBasicBlock * |
5369 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
5370 | |
5371 | /// This method should be implemented by targets that mark instructions with |
5372 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
5373 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
5374 | /// ARM 's' setting instructions. |
5375 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
5376 | SDNode *Node) const; |
5377 | |
5378 | /// If this function returns true, SelectionDAGBuilder emits a |
5379 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
5380 | virtual bool useLoadStackGuardNode() const { |
5381 | return false; |
5382 | } |
5383 | |
5384 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
5385 | const SDLoc &DL) const { |
5386 | llvm_unreachable("not implemented for this target" ); |
5387 | } |
5388 | |
5389 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
5390 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
5391 | SelectionDAG &DAG) const; |
5392 | |
5393 | /// Expands target specific indirect branch for the case of JumpTable |
5394 | /// expansion. |
5395 | virtual SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, |
5396 | SDValue Addr, int JTI, |
5397 | SelectionDAG &DAG) const; |
5398 | |
5399 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
5400 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
5401 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
5402 | // combiner can fold the new nodes. |
5403 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
5404 | |
5405 | // Return true if `X & Y eq/ne 0` is preferable to `X & Y ne/eq Y` |
5406 | virtual bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode, EVT) const { |
5407 | return true; |
5408 | } |
5409 | |
5410 | private: |
5411 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5412 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5413 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
5414 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
5415 | |
5416 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
5417 | SDValue N1, ISD::CondCode Cond, |
5418 | DAGCombinerInfo &DCI, |
5419 | const SDLoc &DL) const; |
5420 | |
5421 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
5422 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( |
5423 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
5424 | DAGCombinerInfo &DCI, const SDLoc &DL) const; |
5425 | |
5426 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
5427 | SDValue CompTargetNode, ISD::CondCode Cond, |
5428 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5429 | SmallVectorImpl<SDNode *> &Created) const; |
5430 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5431 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5432 | const SDLoc &DL) const; |
5433 | |
5434 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
5435 | SDValue CompTargetNode, ISD::CondCode Cond, |
5436 | DAGCombinerInfo &DCI, const SDLoc &DL, |
5437 | SmallVectorImpl<SDNode *> &Created) const; |
5438 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
5439 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
5440 | const SDLoc &DL) const; |
5441 | }; |
5442 | |
5443 | /// Given an LLVM IR type and return type attributes, compute the return value |
5444 | /// EVTs and flags, and optionally also the offsets, if the return value is |
5445 | /// being lowered to memory. |
5446 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, |
5447 | SmallVectorImpl<ISD::OutputArg> &Outs, |
5448 | const TargetLowering &TLI, const DataLayout &DL); |
5449 | |
5450 | } // end namespace llvm |
5451 | |
5452 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |
5453 | |