1 | //===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This implements the TargetLoweringBase class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/ADT/BitVector.h" |
14 | #include "llvm/ADT/STLExtras.h" |
15 | #include "llvm/ADT/SmallVector.h" |
16 | #include "llvm/ADT/StringExtras.h" |
17 | #include "llvm/ADT/StringRef.h" |
18 | #include "llvm/ADT/Twine.h" |
19 | #include "llvm/Analysis/Loads.h" |
20 | #include "llvm/Analysis/TargetTransformInfo.h" |
21 | #include "llvm/CodeGen/Analysis.h" |
22 | #include "llvm/CodeGen/ISDOpcodes.h" |
23 | #include "llvm/CodeGen/MachineBasicBlock.h" |
24 | #include "llvm/CodeGen/MachineFrameInfo.h" |
25 | #include "llvm/CodeGen/MachineFunction.h" |
26 | #include "llvm/CodeGen/MachineInstr.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineMemOperand.h" |
29 | #include "llvm/CodeGen/MachineOperand.h" |
30 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
31 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
32 | #include "llvm/CodeGen/StackMaps.h" |
33 | #include "llvm/CodeGen/TargetLowering.h" |
34 | #include "llvm/CodeGen/TargetOpcodes.h" |
35 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
36 | #include "llvm/CodeGen/ValueTypes.h" |
37 | #include "llvm/CodeGenTypes/MachineValueType.h" |
38 | #include "llvm/IR/Attributes.h" |
39 | #include "llvm/IR/CallingConv.h" |
40 | #include "llvm/IR/DataLayout.h" |
41 | #include "llvm/IR/DerivedTypes.h" |
42 | #include "llvm/IR/Function.h" |
43 | #include "llvm/IR/GlobalValue.h" |
44 | #include "llvm/IR/GlobalVariable.h" |
45 | #include "llvm/IR/IRBuilder.h" |
46 | #include "llvm/IR/Module.h" |
47 | #include "llvm/IR/Type.h" |
48 | #include "llvm/Support/Casting.h" |
49 | #include "llvm/Support/CommandLine.h" |
50 | #include "llvm/Support/Compiler.h" |
51 | #include "llvm/Support/ErrorHandling.h" |
52 | #include "llvm/Support/MathExtras.h" |
53 | #include "llvm/Target/TargetMachine.h" |
54 | #include "llvm/Target/TargetOptions.h" |
55 | #include "llvm/TargetParser/Triple.h" |
56 | #include "llvm/Transforms/Utils/SizeOpts.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <cstdint> |
60 | #include <cstring> |
61 | #include <iterator> |
62 | #include <string> |
63 | #include <tuple> |
64 | #include <utility> |
65 | |
66 | using namespace llvm; |
67 | |
68 | static cl::opt<bool> JumpIsExpensiveOverride( |
69 | "jump-is-expensive" , cl::init(Val: false), |
70 | cl::desc("Do not create extra branches to split comparison logic." ), |
71 | cl::Hidden); |
72 | |
73 | static cl::opt<unsigned> MinimumJumpTableEntries |
74 | ("min-jump-table-entries" , cl::init(Val: 4), cl::Hidden, |
75 | cl::desc("Set minimum number of entries to use a jump table." )); |
76 | |
77 | static cl::opt<unsigned> MaximumJumpTableSize |
78 | ("max-jump-table-size" , cl::init(UINT_MAX), cl::Hidden, |
79 | cl::desc("Set maximum size of jump tables." )); |
80 | |
81 | /// Minimum jump table density for normal functions. |
82 | static cl::opt<unsigned> |
83 | JumpTableDensity("jump-table-density" , cl::init(Val: 10), cl::Hidden, |
84 | cl::desc("Minimum density for building a jump table in " |
85 | "a normal function" )); |
86 | |
87 | /// Minimum jump table density for -Os or -Oz functions. |
88 | static cl::opt<unsigned> OptsizeJumpTableDensity( |
89 | "optsize-jump-table-density" , cl::init(Val: 40), cl::Hidden, |
90 | cl::desc("Minimum density for building a jump table in " |
91 | "an optsize function" )); |
92 | |
93 | // FIXME: This option is only to test if the strict fp operation processed |
94 | // correctly by preventing mutating strict fp operation to normal fp operation |
95 | // during development. When the backend supports strict float operation, this |
96 | // option will be meaningless. |
97 | static cl::opt<bool> DisableStrictNodeMutation("disable-strictnode-mutation" , |
98 | cl::desc("Don't mutate strict-float node to a legalize node" ), |
99 | cl::init(Val: false), cl::Hidden); |
100 | |
101 | static bool darwinHasSinCos(const Triple &TT) { |
102 | assert(TT.isOSDarwin() && "should be called with darwin triple" ); |
103 | // Don't bother with 32 bit x86. |
104 | if (TT.getArch() == Triple::x86) |
105 | return false; |
106 | // Macos < 10.9 has no sincos_stret. |
107 | if (TT.isMacOSX()) |
108 | return !TT.isMacOSXVersionLT(Major: 10, Minor: 9) && TT.isArch64Bit(); |
109 | // iOS < 7.0 has no sincos_stret. |
110 | if (TT.isiOS()) |
111 | return !TT.isOSVersionLT(Major: 7, Minor: 0); |
112 | // Any other darwin such as WatchOS/TvOS is new enough. |
113 | return true; |
114 | } |
115 | |
116 | void TargetLoweringBase::InitLibcalls(const Triple &TT) { |
117 | #define HANDLE_LIBCALL(code, name) \ |
118 | setLibcallName(RTLIB::code, name); |
119 | #include "llvm/IR/RuntimeLibcalls.def" |
120 | #undef HANDLE_LIBCALL |
121 | // Initialize calling conventions to their default. |
122 | for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC) |
123 | setLibcallCallingConv(Call: (RTLIB::Libcall)LC, CC: CallingConv::C); |
124 | |
125 | // Use the f128 variants of math functions on x86_64 |
126 | if (TT.getArch() == Triple::ArchType::x86_64 && TT.isGNUEnvironment()) { |
127 | setLibcallName(Call: RTLIB::REM_F128, Name: "fmodf128" ); |
128 | setLibcallName(Call: RTLIB::FMA_F128, Name: "fmaf128" ); |
129 | setLibcallName(Call: RTLIB::SQRT_F128, Name: "sqrtf128" ); |
130 | setLibcallName(Call: RTLIB::CBRT_F128, Name: "cbrtf128" ); |
131 | setLibcallName(Call: RTLIB::LOG_F128, Name: "logf128" ); |
132 | setLibcallName(Call: RTLIB::LOG_FINITE_F128, Name: "__logf128_finite" ); |
133 | setLibcallName(Call: RTLIB::LOG2_F128, Name: "log2f128" ); |
134 | setLibcallName(Call: RTLIB::LOG2_FINITE_F128, Name: "__log2f128_finite" ); |
135 | setLibcallName(Call: RTLIB::LOG10_F128, Name: "log10f128" ); |
136 | setLibcallName(Call: RTLIB::LOG10_FINITE_F128, Name: "__log10f128_finite" ); |
137 | setLibcallName(Call: RTLIB::EXP_F128, Name: "expf128" ); |
138 | setLibcallName(Call: RTLIB::EXP_FINITE_F128, Name: "__expf128_finite" ); |
139 | setLibcallName(Call: RTLIB::EXP2_F128, Name: "exp2f128" ); |
140 | setLibcallName(Call: RTLIB::EXP2_FINITE_F128, Name: "__exp2f128_finite" ); |
141 | setLibcallName(Call: RTLIB::EXP10_F128, Name: "exp10f128" ); |
142 | setLibcallName(Call: RTLIB::SIN_F128, Name: "sinf128" ); |
143 | setLibcallName(Call: RTLIB::COS_F128, Name: "cosf128" ); |
144 | setLibcallName(Call: RTLIB::SINCOS_F128, Name: "sincosf128" ); |
145 | setLibcallName(Call: RTLIB::POW_F128, Name: "powf128" ); |
146 | setLibcallName(Call: RTLIB::POW_FINITE_F128, Name: "__powf128_finite" ); |
147 | setLibcallName(Call: RTLIB::CEIL_F128, Name: "ceilf128" ); |
148 | setLibcallName(Call: RTLIB::TRUNC_F128, Name: "truncf128" ); |
149 | setLibcallName(Call: RTLIB::RINT_F128, Name: "rintf128" ); |
150 | setLibcallName(Call: RTLIB::NEARBYINT_F128, Name: "nearbyintf128" ); |
151 | setLibcallName(Call: RTLIB::ROUND_F128, Name: "roundf128" ); |
152 | setLibcallName(Call: RTLIB::ROUNDEVEN_F128, Name: "roundevenf128" ); |
153 | setLibcallName(Call: RTLIB::FLOOR_F128, Name: "floorf128" ); |
154 | setLibcallName(Call: RTLIB::COPYSIGN_F128, Name: "copysignf128" ); |
155 | setLibcallName(Call: RTLIB::FMIN_F128, Name: "fminf128" ); |
156 | setLibcallName(Call: RTLIB::FMAX_F128, Name: "fmaxf128" ); |
157 | setLibcallName(Call: RTLIB::LROUND_F128, Name: "lroundf128" ); |
158 | setLibcallName(Call: RTLIB::LLROUND_F128, Name: "llroundf128" ); |
159 | setLibcallName(Call: RTLIB::LRINT_F128, Name: "lrintf128" ); |
160 | setLibcallName(Call: RTLIB::LLRINT_F128, Name: "llrintf128" ); |
161 | setLibcallName(Call: RTLIB::LDEXP_F128, Name: "ldexpf128" ); |
162 | setLibcallName(Call: RTLIB::FREXP_F128, Name: "frexpf128" ); |
163 | } |
164 | |
165 | // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf". |
166 | if (TT.isPPC()) { |
167 | setLibcallName(Call: RTLIB::ADD_F128, Name: "__addkf3" ); |
168 | setLibcallName(Call: RTLIB::SUB_F128, Name: "__subkf3" ); |
169 | setLibcallName(Call: RTLIB::MUL_F128, Name: "__mulkf3" ); |
170 | setLibcallName(Call: RTLIB::DIV_F128, Name: "__divkf3" ); |
171 | setLibcallName(Call: RTLIB::POWI_F128, Name: "__powikf2" ); |
172 | setLibcallName(Call: RTLIB::FPEXT_F32_F128, Name: "__extendsfkf2" ); |
173 | setLibcallName(Call: RTLIB::FPEXT_F64_F128, Name: "__extenddfkf2" ); |
174 | setLibcallName(Call: RTLIB::FPROUND_F128_F32, Name: "__trunckfsf2" ); |
175 | setLibcallName(Call: RTLIB::FPROUND_F128_F64, Name: "__trunckfdf2" ); |
176 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I32, Name: "__fixkfsi" ); |
177 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I64, Name: "__fixkfdi" ); |
178 | setLibcallName(Call: RTLIB::FPTOSINT_F128_I128, Name: "__fixkfti" ); |
179 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I32, Name: "__fixunskfsi" ); |
180 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I64, Name: "__fixunskfdi" ); |
181 | setLibcallName(Call: RTLIB::FPTOUINT_F128_I128, Name: "__fixunskfti" ); |
182 | setLibcallName(Call: RTLIB::SINTTOFP_I32_F128, Name: "__floatsikf" ); |
183 | setLibcallName(Call: RTLIB::SINTTOFP_I64_F128, Name: "__floatdikf" ); |
184 | setLibcallName(Call: RTLIB::SINTTOFP_I128_F128, Name: "__floattikf" ); |
185 | setLibcallName(Call: RTLIB::UINTTOFP_I32_F128, Name: "__floatunsikf" ); |
186 | setLibcallName(Call: RTLIB::UINTTOFP_I64_F128, Name: "__floatundikf" ); |
187 | setLibcallName(Call: RTLIB::UINTTOFP_I128_F128, Name: "__floatuntikf" ); |
188 | setLibcallName(Call: RTLIB::OEQ_F128, Name: "__eqkf2" ); |
189 | setLibcallName(Call: RTLIB::UNE_F128, Name: "__nekf2" ); |
190 | setLibcallName(Call: RTLIB::OGE_F128, Name: "__gekf2" ); |
191 | setLibcallName(Call: RTLIB::OLT_F128, Name: "__ltkf2" ); |
192 | setLibcallName(Call: RTLIB::OLE_F128, Name: "__lekf2" ); |
193 | setLibcallName(Call: RTLIB::OGT_F128, Name: "__gtkf2" ); |
194 | setLibcallName(Call: RTLIB::UO_F128, Name: "__unordkf2" ); |
195 | } |
196 | |
197 | // A few names are different on particular architectures or environments. |
198 | if (TT.isOSDarwin()) { |
199 | // For f16/f32 conversions, Darwin uses the standard naming scheme, instead |
200 | // of the gnueabi-style __gnu_*_ieee. |
201 | // FIXME: What about other targets? |
202 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__extendhfsf2" ); |
203 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__truncsfhf2" ); |
204 | |
205 | // Some darwins have an optimized __bzero/bzero function. |
206 | switch (TT.getArch()) { |
207 | case Triple::x86: |
208 | case Triple::x86_64: |
209 | if (TT.isMacOSX() && !TT.isMacOSXVersionLT(Major: 10, Minor: 6)) |
210 | setLibcallName(Call: RTLIB::BZERO, Name: "__bzero" ); |
211 | break; |
212 | case Triple::aarch64: |
213 | case Triple::aarch64_32: |
214 | setLibcallName(Call: RTLIB::BZERO, Name: "bzero" ); |
215 | break; |
216 | default: |
217 | break; |
218 | } |
219 | |
220 | if (darwinHasSinCos(TT)) { |
221 | setLibcallName(Call: RTLIB::SINCOS_STRET_F32, Name: "__sincosf_stret" ); |
222 | setLibcallName(Call: RTLIB::SINCOS_STRET_F64, Name: "__sincos_stret" ); |
223 | if (TT.isWatchABI()) { |
224 | setLibcallCallingConv(Call: RTLIB::SINCOS_STRET_F32, |
225 | CC: CallingConv::ARM_AAPCS_VFP); |
226 | setLibcallCallingConv(Call: RTLIB::SINCOS_STRET_F64, |
227 | CC: CallingConv::ARM_AAPCS_VFP); |
228 | } |
229 | } |
230 | } else { |
231 | setLibcallName(Call: RTLIB::FPEXT_F16_F32, Name: "__gnu_h2f_ieee" ); |
232 | setLibcallName(Call: RTLIB::FPROUND_F32_F16, Name: "__gnu_f2h_ieee" ); |
233 | } |
234 | |
235 | if (TT.isGNUEnvironment() || TT.isOSFuchsia() || |
236 | (TT.isAndroid() && !TT.isAndroidVersionLT(Major: 9))) { |
237 | setLibcallName(Call: RTLIB::SINCOS_F32, Name: "sincosf" ); |
238 | setLibcallName(Call: RTLIB::SINCOS_F64, Name: "sincos" ); |
239 | setLibcallName(Call: RTLIB::SINCOS_F80, Name: "sincosl" ); |
240 | setLibcallName(Call: RTLIB::SINCOS_F128, Name: "sincosl" ); |
241 | setLibcallName(Call: RTLIB::SINCOS_PPCF128, Name: "sincosl" ); |
242 | } |
243 | |
244 | if (TT.isPS()) { |
245 | setLibcallName(Call: RTLIB::SINCOS_F32, Name: "sincosf" ); |
246 | setLibcallName(Call: RTLIB::SINCOS_F64, Name: "sincos" ); |
247 | } |
248 | |
249 | if (TT.isOSOpenBSD()) { |
250 | setLibcallName(Call: RTLIB::STACKPROTECTOR_CHECK_FAIL, Name: nullptr); |
251 | } |
252 | |
253 | if (TT.isOSWindows() && !TT.isOSCygMing()) { |
254 | setLibcallName(Call: RTLIB::LDEXP_F32, Name: nullptr); |
255 | setLibcallName(Call: RTLIB::LDEXP_F80, Name: nullptr); |
256 | setLibcallName(Call: RTLIB::LDEXP_F128, Name: nullptr); |
257 | setLibcallName(Call: RTLIB::LDEXP_PPCF128, Name: nullptr); |
258 | |
259 | setLibcallName(Call: RTLIB::FREXP_F32, Name: nullptr); |
260 | setLibcallName(Call: RTLIB::FREXP_F80, Name: nullptr); |
261 | setLibcallName(Call: RTLIB::FREXP_F128, Name: nullptr); |
262 | setLibcallName(Call: RTLIB::FREXP_PPCF128, Name: nullptr); |
263 | } |
264 | } |
265 | |
266 | /// GetFPLibCall - Helper to return the right libcall for the given floating |
267 | /// point type, or UNKNOWN_LIBCALL if there is none. |
268 | RTLIB::Libcall RTLIB::getFPLibCall(EVT VT, |
269 | RTLIB::Libcall Call_F32, |
270 | RTLIB::Libcall Call_F64, |
271 | RTLIB::Libcall Call_F80, |
272 | RTLIB::Libcall Call_F128, |
273 | RTLIB::Libcall Call_PPCF128) { |
274 | return |
275 | VT == MVT::f32 ? Call_F32 : |
276 | VT == MVT::f64 ? Call_F64 : |
277 | VT == MVT::f80 ? Call_F80 : |
278 | VT == MVT::f128 ? Call_F128 : |
279 | VT == MVT::ppcf128 ? Call_PPCF128 : |
280 | RTLIB::UNKNOWN_LIBCALL; |
281 | } |
282 | |
283 | /// getFPEXT - Return the FPEXT_*_* value for the given types, or |
284 | /// UNKNOWN_LIBCALL if there is none. |
285 | RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { |
286 | if (OpVT == MVT::f16) { |
287 | if (RetVT == MVT::f32) |
288 | return FPEXT_F16_F32; |
289 | if (RetVT == MVT::f64) |
290 | return FPEXT_F16_F64; |
291 | if (RetVT == MVT::f80) |
292 | return FPEXT_F16_F80; |
293 | if (RetVT == MVT::f128) |
294 | return FPEXT_F16_F128; |
295 | } else if (OpVT == MVT::f32) { |
296 | if (RetVT == MVT::f64) |
297 | return FPEXT_F32_F64; |
298 | if (RetVT == MVT::f128) |
299 | return FPEXT_F32_F128; |
300 | if (RetVT == MVT::ppcf128) |
301 | return FPEXT_F32_PPCF128; |
302 | } else if (OpVT == MVT::f64) { |
303 | if (RetVT == MVT::f128) |
304 | return FPEXT_F64_F128; |
305 | else if (RetVT == MVT::ppcf128) |
306 | return FPEXT_F64_PPCF128; |
307 | } else if (OpVT == MVT::f80) { |
308 | if (RetVT == MVT::f128) |
309 | return FPEXT_F80_F128; |
310 | } else if (OpVT == MVT::bf16) { |
311 | if (RetVT == MVT::f32) |
312 | return FPEXT_BF16_F32; |
313 | } |
314 | |
315 | return UNKNOWN_LIBCALL; |
316 | } |
317 | |
318 | /// getFPROUND - Return the FPROUND_*_* value for the given types, or |
319 | /// UNKNOWN_LIBCALL if there is none. |
320 | RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { |
321 | if (RetVT == MVT::f16) { |
322 | if (OpVT == MVT::f32) |
323 | return FPROUND_F32_F16; |
324 | if (OpVT == MVT::f64) |
325 | return FPROUND_F64_F16; |
326 | if (OpVT == MVT::f80) |
327 | return FPROUND_F80_F16; |
328 | if (OpVT == MVT::f128) |
329 | return FPROUND_F128_F16; |
330 | if (OpVT == MVT::ppcf128) |
331 | return FPROUND_PPCF128_F16; |
332 | } else if (RetVT == MVT::bf16) { |
333 | if (OpVT == MVT::f32) |
334 | return FPROUND_F32_BF16; |
335 | if (OpVT == MVT::f64) |
336 | return FPROUND_F64_BF16; |
337 | } else if (RetVT == MVT::f32) { |
338 | if (OpVT == MVT::f64) |
339 | return FPROUND_F64_F32; |
340 | if (OpVT == MVT::f80) |
341 | return FPROUND_F80_F32; |
342 | if (OpVT == MVT::f128) |
343 | return FPROUND_F128_F32; |
344 | if (OpVT == MVT::ppcf128) |
345 | return FPROUND_PPCF128_F32; |
346 | } else if (RetVT == MVT::f64) { |
347 | if (OpVT == MVT::f80) |
348 | return FPROUND_F80_F64; |
349 | if (OpVT == MVT::f128) |
350 | return FPROUND_F128_F64; |
351 | if (OpVT == MVT::ppcf128) |
352 | return FPROUND_PPCF128_F64; |
353 | } else if (RetVT == MVT::f80) { |
354 | if (OpVT == MVT::f128) |
355 | return FPROUND_F128_F80; |
356 | } |
357 | |
358 | return UNKNOWN_LIBCALL; |
359 | } |
360 | |
361 | /// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or |
362 | /// UNKNOWN_LIBCALL if there is none. |
363 | RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { |
364 | if (OpVT == MVT::f16) { |
365 | if (RetVT == MVT::i32) |
366 | return FPTOSINT_F16_I32; |
367 | if (RetVT == MVT::i64) |
368 | return FPTOSINT_F16_I64; |
369 | if (RetVT == MVT::i128) |
370 | return FPTOSINT_F16_I128; |
371 | } else if (OpVT == MVT::f32) { |
372 | if (RetVT == MVT::i32) |
373 | return FPTOSINT_F32_I32; |
374 | if (RetVT == MVT::i64) |
375 | return FPTOSINT_F32_I64; |
376 | if (RetVT == MVT::i128) |
377 | return FPTOSINT_F32_I128; |
378 | } else if (OpVT == MVT::f64) { |
379 | if (RetVT == MVT::i32) |
380 | return FPTOSINT_F64_I32; |
381 | if (RetVT == MVT::i64) |
382 | return FPTOSINT_F64_I64; |
383 | if (RetVT == MVT::i128) |
384 | return FPTOSINT_F64_I128; |
385 | } else if (OpVT == MVT::f80) { |
386 | if (RetVT == MVT::i32) |
387 | return FPTOSINT_F80_I32; |
388 | if (RetVT == MVT::i64) |
389 | return FPTOSINT_F80_I64; |
390 | if (RetVT == MVT::i128) |
391 | return FPTOSINT_F80_I128; |
392 | } else if (OpVT == MVT::f128) { |
393 | if (RetVT == MVT::i32) |
394 | return FPTOSINT_F128_I32; |
395 | if (RetVT == MVT::i64) |
396 | return FPTOSINT_F128_I64; |
397 | if (RetVT == MVT::i128) |
398 | return FPTOSINT_F128_I128; |
399 | } else if (OpVT == MVT::ppcf128) { |
400 | if (RetVT == MVT::i32) |
401 | return FPTOSINT_PPCF128_I32; |
402 | if (RetVT == MVT::i64) |
403 | return FPTOSINT_PPCF128_I64; |
404 | if (RetVT == MVT::i128) |
405 | return FPTOSINT_PPCF128_I128; |
406 | } |
407 | return UNKNOWN_LIBCALL; |
408 | } |
409 | |
410 | /// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or |
411 | /// UNKNOWN_LIBCALL if there is none. |
412 | RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { |
413 | if (OpVT == MVT::f16) { |
414 | if (RetVT == MVT::i32) |
415 | return FPTOUINT_F16_I32; |
416 | if (RetVT == MVT::i64) |
417 | return FPTOUINT_F16_I64; |
418 | if (RetVT == MVT::i128) |
419 | return FPTOUINT_F16_I128; |
420 | } else if (OpVT == MVT::f32) { |
421 | if (RetVT == MVT::i32) |
422 | return FPTOUINT_F32_I32; |
423 | if (RetVT == MVT::i64) |
424 | return FPTOUINT_F32_I64; |
425 | if (RetVT == MVT::i128) |
426 | return FPTOUINT_F32_I128; |
427 | } else if (OpVT == MVT::f64) { |
428 | if (RetVT == MVT::i32) |
429 | return FPTOUINT_F64_I32; |
430 | if (RetVT == MVT::i64) |
431 | return FPTOUINT_F64_I64; |
432 | if (RetVT == MVT::i128) |
433 | return FPTOUINT_F64_I128; |
434 | } else if (OpVT == MVT::f80) { |
435 | if (RetVT == MVT::i32) |
436 | return FPTOUINT_F80_I32; |
437 | if (RetVT == MVT::i64) |
438 | return FPTOUINT_F80_I64; |
439 | if (RetVT == MVT::i128) |
440 | return FPTOUINT_F80_I128; |
441 | } else if (OpVT == MVT::f128) { |
442 | if (RetVT == MVT::i32) |
443 | return FPTOUINT_F128_I32; |
444 | if (RetVT == MVT::i64) |
445 | return FPTOUINT_F128_I64; |
446 | if (RetVT == MVT::i128) |
447 | return FPTOUINT_F128_I128; |
448 | } else if (OpVT == MVT::ppcf128) { |
449 | if (RetVT == MVT::i32) |
450 | return FPTOUINT_PPCF128_I32; |
451 | if (RetVT == MVT::i64) |
452 | return FPTOUINT_PPCF128_I64; |
453 | if (RetVT == MVT::i128) |
454 | return FPTOUINT_PPCF128_I128; |
455 | } |
456 | return UNKNOWN_LIBCALL; |
457 | } |
458 | |
459 | /// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or |
460 | /// UNKNOWN_LIBCALL if there is none. |
461 | RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { |
462 | if (OpVT == MVT::i32) { |
463 | if (RetVT == MVT::f16) |
464 | return SINTTOFP_I32_F16; |
465 | if (RetVT == MVT::f32) |
466 | return SINTTOFP_I32_F32; |
467 | if (RetVT == MVT::f64) |
468 | return SINTTOFP_I32_F64; |
469 | if (RetVT == MVT::f80) |
470 | return SINTTOFP_I32_F80; |
471 | if (RetVT == MVT::f128) |
472 | return SINTTOFP_I32_F128; |
473 | if (RetVT == MVT::ppcf128) |
474 | return SINTTOFP_I32_PPCF128; |
475 | } else if (OpVT == MVT::i64) { |
476 | if (RetVT == MVT::f16) |
477 | return SINTTOFP_I64_F16; |
478 | if (RetVT == MVT::f32) |
479 | return SINTTOFP_I64_F32; |
480 | if (RetVT == MVT::f64) |
481 | return SINTTOFP_I64_F64; |
482 | if (RetVT == MVT::f80) |
483 | return SINTTOFP_I64_F80; |
484 | if (RetVT == MVT::f128) |
485 | return SINTTOFP_I64_F128; |
486 | if (RetVT == MVT::ppcf128) |
487 | return SINTTOFP_I64_PPCF128; |
488 | } else if (OpVT == MVT::i128) { |
489 | if (RetVT == MVT::f16) |
490 | return SINTTOFP_I128_F16; |
491 | if (RetVT == MVT::f32) |
492 | return SINTTOFP_I128_F32; |
493 | if (RetVT == MVT::f64) |
494 | return SINTTOFP_I128_F64; |
495 | if (RetVT == MVT::f80) |
496 | return SINTTOFP_I128_F80; |
497 | if (RetVT == MVT::f128) |
498 | return SINTTOFP_I128_F128; |
499 | if (RetVT == MVT::ppcf128) |
500 | return SINTTOFP_I128_PPCF128; |
501 | } |
502 | return UNKNOWN_LIBCALL; |
503 | } |
504 | |
505 | /// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or |
506 | /// UNKNOWN_LIBCALL if there is none. |
507 | RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { |
508 | if (OpVT == MVT::i32) { |
509 | if (RetVT == MVT::f16) |
510 | return UINTTOFP_I32_F16; |
511 | if (RetVT == MVT::f32) |
512 | return UINTTOFP_I32_F32; |
513 | if (RetVT == MVT::f64) |
514 | return UINTTOFP_I32_F64; |
515 | if (RetVT == MVT::f80) |
516 | return UINTTOFP_I32_F80; |
517 | if (RetVT == MVT::f128) |
518 | return UINTTOFP_I32_F128; |
519 | if (RetVT == MVT::ppcf128) |
520 | return UINTTOFP_I32_PPCF128; |
521 | } else if (OpVT == MVT::i64) { |
522 | if (RetVT == MVT::f16) |
523 | return UINTTOFP_I64_F16; |
524 | if (RetVT == MVT::f32) |
525 | return UINTTOFP_I64_F32; |
526 | if (RetVT == MVT::f64) |
527 | return UINTTOFP_I64_F64; |
528 | if (RetVT == MVT::f80) |
529 | return UINTTOFP_I64_F80; |
530 | if (RetVT == MVT::f128) |
531 | return UINTTOFP_I64_F128; |
532 | if (RetVT == MVT::ppcf128) |
533 | return UINTTOFP_I64_PPCF128; |
534 | } else if (OpVT == MVT::i128) { |
535 | if (RetVT == MVT::f16) |
536 | return UINTTOFP_I128_F16; |
537 | if (RetVT == MVT::f32) |
538 | return UINTTOFP_I128_F32; |
539 | if (RetVT == MVT::f64) |
540 | return UINTTOFP_I128_F64; |
541 | if (RetVT == MVT::f80) |
542 | return UINTTOFP_I128_F80; |
543 | if (RetVT == MVT::f128) |
544 | return UINTTOFP_I128_F128; |
545 | if (RetVT == MVT::ppcf128) |
546 | return UINTTOFP_I128_PPCF128; |
547 | } |
548 | return UNKNOWN_LIBCALL; |
549 | } |
550 | |
551 | RTLIB::Libcall RTLIB::getPOWI(EVT RetVT) { |
552 | return getFPLibCall(VT: RetVT, Call_F32: POWI_F32, Call_F64: POWI_F64, Call_F80: POWI_F80, Call_F128: POWI_F128, |
553 | Call_PPCF128: POWI_PPCF128); |
554 | } |
555 | |
556 | RTLIB::Libcall RTLIB::getLDEXP(EVT RetVT) { |
557 | return getFPLibCall(VT: RetVT, Call_F32: LDEXP_F32, Call_F64: LDEXP_F64, Call_F80: LDEXP_F80, Call_F128: LDEXP_F128, |
558 | Call_PPCF128: LDEXP_PPCF128); |
559 | } |
560 | |
561 | RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { |
562 | return getFPLibCall(VT: RetVT, Call_F32: FREXP_F32, Call_F64: FREXP_F64, Call_F80: FREXP_F80, Call_F128: FREXP_F128, |
563 | Call_PPCF128: FREXP_PPCF128); |
564 | } |
565 | |
566 | RTLIB::Libcall RTLIB::getOutlineAtomicHelper(const Libcall (&LC)[5][4], |
567 | AtomicOrdering Order, |
568 | uint64_t MemSize) { |
569 | unsigned ModeN, ModelN; |
570 | switch (MemSize) { |
571 | case 1: |
572 | ModeN = 0; |
573 | break; |
574 | case 2: |
575 | ModeN = 1; |
576 | break; |
577 | case 4: |
578 | ModeN = 2; |
579 | break; |
580 | case 8: |
581 | ModeN = 3; |
582 | break; |
583 | case 16: |
584 | ModeN = 4; |
585 | break; |
586 | default: |
587 | return RTLIB::UNKNOWN_LIBCALL; |
588 | } |
589 | |
590 | switch (Order) { |
591 | case AtomicOrdering::Monotonic: |
592 | ModelN = 0; |
593 | break; |
594 | case AtomicOrdering::Acquire: |
595 | ModelN = 1; |
596 | break; |
597 | case AtomicOrdering::Release: |
598 | ModelN = 2; |
599 | break; |
600 | case AtomicOrdering::AcquireRelease: |
601 | case AtomicOrdering::SequentiallyConsistent: |
602 | ModelN = 3; |
603 | break; |
604 | default: |
605 | return UNKNOWN_LIBCALL; |
606 | } |
607 | |
608 | return LC[ModeN][ModelN]; |
609 | } |
610 | |
611 | RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, |
612 | MVT VT) { |
613 | if (!VT.isScalarInteger()) |
614 | return UNKNOWN_LIBCALL; |
615 | uint64_t MemSize = VT.getScalarSizeInBits() / 8; |
616 | |
617 | #define LCALLS(A, B) \ |
618 | { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL } |
619 | #define LCALL5(A) \ |
620 | LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16) |
621 | switch (Opc) { |
622 | case ISD::ATOMIC_CMP_SWAP: { |
623 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_CAS)}; |
624 | return getOutlineAtomicHelper(LC, Order, MemSize); |
625 | } |
626 | case ISD::ATOMIC_SWAP: { |
627 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_SWP)}; |
628 | return getOutlineAtomicHelper(LC, Order, MemSize); |
629 | } |
630 | case ISD::ATOMIC_LOAD_ADD: { |
631 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDADD)}; |
632 | return getOutlineAtomicHelper(LC, Order, MemSize); |
633 | } |
634 | case ISD::ATOMIC_LOAD_OR: { |
635 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDSET)}; |
636 | return getOutlineAtomicHelper(LC, Order, MemSize); |
637 | } |
638 | case ISD::ATOMIC_LOAD_CLR: { |
639 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDCLR)}; |
640 | return getOutlineAtomicHelper(LC, Order, MemSize); |
641 | } |
642 | case ISD::ATOMIC_LOAD_XOR: { |
643 | const Libcall LC[5][4] = {LCALL5(OUTLINE_ATOMIC_LDEOR)}; |
644 | return getOutlineAtomicHelper(LC, Order, MemSize); |
645 | } |
646 | default: |
647 | return UNKNOWN_LIBCALL; |
648 | } |
649 | #undef LCALLS |
650 | #undef LCALL5 |
651 | } |
652 | |
653 | RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) { |
654 | #define OP_TO_LIBCALL(Name, Enum) \ |
655 | case Name: \ |
656 | switch (VT.SimpleTy) { \ |
657 | default: \ |
658 | return UNKNOWN_LIBCALL; \ |
659 | case MVT::i8: \ |
660 | return Enum##_1; \ |
661 | case MVT::i16: \ |
662 | return Enum##_2; \ |
663 | case MVT::i32: \ |
664 | return Enum##_4; \ |
665 | case MVT::i64: \ |
666 | return Enum##_8; \ |
667 | case MVT::i128: \ |
668 | return Enum##_16; \ |
669 | } |
670 | |
671 | switch (Opc) { |
672 | OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) |
673 | OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) |
674 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) |
675 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) |
676 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) |
677 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) |
678 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) |
679 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) |
680 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) |
681 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) |
682 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) |
683 | OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) |
684 | } |
685 | |
686 | #undef OP_TO_LIBCALL |
687 | |
688 | return UNKNOWN_LIBCALL; |
689 | } |
690 | |
691 | RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
692 | switch (ElementSize) { |
693 | case 1: |
694 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_1; |
695 | case 2: |
696 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_2; |
697 | case 4: |
698 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_4; |
699 | case 8: |
700 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_8; |
701 | case 16: |
702 | return MEMCPY_ELEMENT_UNORDERED_ATOMIC_16; |
703 | default: |
704 | return UNKNOWN_LIBCALL; |
705 | } |
706 | } |
707 | |
708 | RTLIB::Libcall RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
709 | switch (ElementSize) { |
710 | case 1: |
711 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_1; |
712 | case 2: |
713 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_2; |
714 | case 4: |
715 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_4; |
716 | case 8: |
717 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_8; |
718 | case 16: |
719 | return MEMMOVE_ELEMENT_UNORDERED_ATOMIC_16; |
720 | default: |
721 | return UNKNOWN_LIBCALL; |
722 | } |
723 | } |
724 | |
725 | RTLIB::Libcall RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(uint64_t ElementSize) { |
726 | switch (ElementSize) { |
727 | case 1: |
728 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_1; |
729 | case 2: |
730 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_2; |
731 | case 4: |
732 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_4; |
733 | case 8: |
734 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_8; |
735 | case 16: |
736 | return MEMSET_ELEMENT_UNORDERED_ATOMIC_16; |
737 | default: |
738 | return UNKNOWN_LIBCALL; |
739 | } |
740 | } |
741 | |
742 | /// InitCmpLibcallCCs - Set default comparison libcall CC. |
743 | static void InitCmpLibcallCCs(ISD::CondCode *CCs) { |
744 | std::fill(first: CCs, last: CCs + RTLIB::UNKNOWN_LIBCALL, value: ISD::SETCC_INVALID); |
745 | CCs[RTLIB::OEQ_F32] = ISD::SETEQ; |
746 | CCs[RTLIB::OEQ_F64] = ISD::SETEQ; |
747 | CCs[RTLIB::OEQ_F128] = ISD::SETEQ; |
748 | CCs[RTLIB::OEQ_PPCF128] = ISD::SETEQ; |
749 | CCs[RTLIB::UNE_F32] = ISD::SETNE; |
750 | CCs[RTLIB::UNE_F64] = ISD::SETNE; |
751 | CCs[RTLIB::UNE_F128] = ISD::SETNE; |
752 | CCs[RTLIB::UNE_PPCF128] = ISD::SETNE; |
753 | CCs[RTLIB::OGE_F32] = ISD::SETGE; |
754 | CCs[RTLIB::OGE_F64] = ISD::SETGE; |
755 | CCs[RTLIB::OGE_F128] = ISD::SETGE; |
756 | CCs[RTLIB::OGE_PPCF128] = ISD::SETGE; |
757 | CCs[RTLIB::OLT_F32] = ISD::SETLT; |
758 | CCs[RTLIB::OLT_F64] = ISD::SETLT; |
759 | CCs[RTLIB::OLT_F128] = ISD::SETLT; |
760 | CCs[RTLIB::OLT_PPCF128] = ISD::SETLT; |
761 | CCs[RTLIB::OLE_F32] = ISD::SETLE; |
762 | CCs[RTLIB::OLE_F64] = ISD::SETLE; |
763 | CCs[RTLIB::OLE_F128] = ISD::SETLE; |
764 | CCs[RTLIB::OLE_PPCF128] = ISD::SETLE; |
765 | CCs[RTLIB::OGT_F32] = ISD::SETGT; |
766 | CCs[RTLIB::OGT_F64] = ISD::SETGT; |
767 | CCs[RTLIB::OGT_F128] = ISD::SETGT; |
768 | CCs[RTLIB::OGT_PPCF128] = ISD::SETGT; |
769 | CCs[RTLIB::UO_F32] = ISD::SETNE; |
770 | CCs[RTLIB::UO_F64] = ISD::SETNE; |
771 | CCs[RTLIB::UO_F128] = ISD::SETNE; |
772 | CCs[RTLIB::UO_PPCF128] = ISD::SETNE; |
773 | } |
774 | |
775 | /// NOTE: The TargetMachine owns TLOF. |
776 | TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { |
777 | initActions(); |
778 | |
779 | // Perform these initializations only once. |
780 | MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = |
781 | MaxLoadsPerMemcmp = 8; |
782 | MaxGluedStoresPerMemcpy = 0; |
783 | MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = |
784 | MaxStoresPerMemmoveOptSize = MaxLoadsPerMemcmpOptSize = 4; |
785 | HasMultipleConditionRegisters = false; |
786 | HasExtractBitsInsn = false; |
787 | JumpIsExpensive = JumpIsExpensiveOverride; |
788 | PredictableSelectIsExpensive = false; |
789 | EnableExtLdPromotion = false; |
790 | StackPointerRegisterToSaveRestore = 0; |
791 | BooleanContents = UndefinedBooleanContent; |
792 | BooleanFloatContents = UndefinedBooleanContent; |
793 | BooleanVectorContents = UndefinedBooleanContent; |
794 | SchedPreferenceInfo = Sched::ILP; |
795 | GatherAllAliasesMaxDepth = 18; |
796 | IsStrictFPEnabled = DisableStrictNodeMutation; |
797 | MaxBytesForAlignment = 0; |
798 | MaxAtomicSizeInBitsSupported = 0; |
799 | |
800 | // Assume that even with libcalls, no target supports wider than 128 bit |
801 | // division. |
802 | MaxDivRemBitWidthSupported = 128; |
803 | |
804 | MaxLargeFPConvertBitWidthSupported = llvm::IntegerType::MAX_INT_BITS; |
805 | |
806 | MinCmpXchgSizeInBits = 0; |
807 | SupportsUnalignedAtomics = false; |
808 | |
809 | std::fill(first: std::begin(arr&: LibcallRoutineNames), last: std::end(arr&: LibcallRoutineNames), value: nullptr); |
810 | |
811 | InitLibcalls(TT: TM.getTargetTriple()); |
812 | InitCmpLibcallCCs(CCs: CmpLibcallCCs); |
813 | } |
814 | |
815 | void TargetLoweringBase::initActions() { |
816 | // All operations default to being supported. |
817 | memset(s: OpActions, c: 0, n: sizeof(OpActions)); |
818 | memset(s: LoadExtActions, c: 0, n: sizeof(LoadExtActions)); |
819 | memset(s: TruncStoreActions, c: 0, n: sizeof(TruncStoreActions)); |
820 | memset(s: IndexedModeActions, c: 0, n: sizeof(IndexedModeActions)); |
821 | memset(s: CondCodeActions, c: 0, n: sizeof(CondCodeActions)); |
822 | std::fill(first: std::begin(arr&: RegClassForVT), last: std::end(arr&: RegClassForVT), value: nullptr); |
823 | std::fill(first: std::begin(arr&: TargetDAGCombineArray), |
824 | last: std::end(arr&: TargetDAGCombineArray), value: 0); |
825 | |
826 | // Let extending atomic loads be unsupported by default. |
827 | for (MVT ValVT : MVT::all_valuetypes()) |
828 | for (MVT MemVT : MVT::all_valuetypes()) |
829 | setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, ValVT, MemVT, |
830 | Expand); |
831 | |
832 | // We're somewhat special casing MVT::i2 and MVT::i4. Ideally we want to |
833 | // remove this and targets should individually set these types if not legal. |
834 | for (ISD::NodeType NT : enum_seq(Begin: ISD::DELETED_NODE, End: ISD::BUILTIN_OP_END, |
835 | force_iteration_on_noniterable_enum)) { |
836 | for (MVT VT : {MVT::i2, MVT::i4}) |
837 | OpActions[(unsigned)VT.SimpleTy][NT] = Expand; |
838 | } |
839 | for (MVT AVT : MVT::all_valuetypes()) { |
840 | for (MVT VT : {MVT::i2, MVT::i4, MVT::v128i2, MVT::v64i4}) { |
841 | setTruncStoreAction(AVT, VT, Expand); |
842 | setLoadExtAction(ISD::EXTLOAD, AVT, VT, Expand); |
843 | setLoadExtAction(ISD::ZEXTLOAD, AVT, VT, Expand); |
844 | } |
845 | } |
846 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
847 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
848 | for (MVT VT : {MVT::i2, MVT::i4}) { |
849 | setIndexedLoadAction(IM, VT, Expand); |
850 | setIndexedStoreAction(IM, VT, Expand); |
851 | setIndexedMaskedLoadAction(IM, VT, Expand); |
852 | setIndexedMaskedStoreAction(IM, VT, Expand); |
853 | } |
854 | } |
855 | |
856 | for (MVT VT : MVT::fp_valuetypes()) { |
857 | MVT IntVT = MVT::getIntegerVT(VT.getFixedSizeInBits()); |
858 | if (IntVT.isValid()) { |
859 | setOperationAction(ISD::ATOMIC_SWAP, VT, Promote); |
860 | AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT); |
861 | } |
862 | } |
863 | |
864 | // Set default actions for various operations. |
865 | for (MVT VT : MVT::all_valuetypes()) { |
866 | // Default all indexed load / store to expand. |
867 | for (unsigned IM = (unsigned)ISD::PRE_INC; |
868 | IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { |
869 | setIndexedLoadAction(IM, VT, Expand); |
870 | setIndexedStoreAction(IM, VT, Expand); |
871 | setIndexedMaskedLoadAction(IM, VT, Expand); |
872 | setIndexedMaskedStoreAction(IM, VT, Expand); |
873 | } |
874 | |
875 | // Most backends expect to see the node which just returns the value loaded. |
876 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); |
877 | |
878 | // These operations default to expand. |
879 | setOperationAction({ISD::FGETSIGN, ISD::CONCAT_VECTORS, |
880 | ISD::FMINNUM, ISD::FMAXNUM, |
881 | ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, |
882 | ISD::FMINIMUM, ISD::FMAXIMUM, |
883 | ISD::FMAD, ISD::SMIN, |
884 | ISD::SMAX, ISD::UMIN, |
885 | ISD::UMAX, ISD::ABS, |
886 | ISD::FSHL, ISD::FSHR, |
887 | ISD::SADDSAT, ISD::UADDSAT, |
888 | ISD::SSUBSAT, ISD::USUBSAT, |
889 | ISD::SSHLSAT, ISD::USHLSAT, |
890 | ISD::SMULFIX, ISD::SMULFIXSAT, |
891 | ISD::UMULFIX, ISD::UMULFIXSAT, |
892 | ISD::SDIVFIX, ISD::SDIVFIXSAT, |
893 | ISD::UDIVFIX, ISD::UDIVFIXSAT, |
894 | ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT, |
895 | ISD::IS_FPCLASS}, |
896 | VT, Expand); |
897 | |
898 | // Overflow operations default to expand |
899 | setOperationAction({ISD::SADDO, ISD::SSUBO, ISD::UADDO, ISD::USUBO, |
900 | ISD::SMULO, ISD::UMULO}, |
901 | VT, Expand); |
902 | |
903 | // Carry-using overflow operations default to expand. |
904 | setOperationAction({ISD::UADDO_CARRY, ISD::USUBO_CARRY, ISD::SETCCCARRY, |
905 | ISD::SADDO_CARRY, ISD::SSUBO_CARRY}, |
906 | VT, Expand); |
907 | |
908 | // ADDC/ADDE/SUBC/SUBE default to expand. |
909 | setOperationAction({ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}, VT, |
910 | Expand); |
911 | |
912 | // Halving adds |
913 | setOperationAction( |
914 | {ISD::AVGFLOORS, ISD::AVGFLOORU, ISD::AVGCEILS, ISD::AVGCEILU}, VT, |
915 | Expand); |
916 | |
917 | // Absolute difference |
918 | setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand); |
919 | |
920 | // These default to Expand so they will be expanded to CTLZ/CTTZ by default. |
921 | setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT, |
922 | Expand); |
923 | |
924 | setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); |
925 | |
926 | // These library functions default to expand. |
927 | setOperationAction({ISD::FROUND, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, VT, |
928 | Expand); |
929 | |
930 | // These operations default to expand for vector types. |
931 | if (VT.isVector()) |
932 | setOperationAction( |
933 | {ISD::FCOPYSIGN, ISD::SIGN_EXTEND_INREG, ISD::ANY_EXTEND_VECTOR_INREG, |
934 | ISD::SIGN_EXTEND_VECTOR_INREG, ISD::ZERO_EXTEND_VECTOR_INREG, |
935 | ISD::SPLAT_VECTOR, ISD::LRINT, ISD::LLRINT}, |
936 | VT, Expand); |
937 | |
938 | // Constrained floating-point operations default to expand. |
939 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ |
940 | setOperationAction(ISD::STRICT_##DAGN, VT, Expand); |
941 | #include "llvm/IR/ConstrainedOps.def" |
942 | |
943 | // For most targets @llvm.get.dynamic.area.offset just returns 0. |
944 | setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); |
945 | |
946 | // Vector reduction default to expand. |
947 | setOperationAction( |
948 | {ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMUL, ISD::VECREDUCE_ADD, |
949 | ISD::VECREDUCE_MUL, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, |
950 | ISD::VECREDUCE_XOR, ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN, |
951 | ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_FMAX, |
952 | ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAXIMUM, ISD::VECREDUCE_FMINIMUM, |
953 | ISD::VECREDUCE_SEQ_FADD, ISD::VECREDUCE_SEQ_FMUL}, |
954 | VT, Expand); |
955 | |
956 | // Named vector shuffles default to expand. |
957 | setOperationAction(ISD::VECTOR_SPLICE, VT, Expand); |
958 | |
959 | // VP operations default to expand. |
960 | #define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ |
961 | setOperationAction(ISD::SDOPC, VT, Expand); |
962 | #include "llvm/IR/VPIntrinsics.def" |
963 | |
964 | // FP environment operations default to expand. |
965 | setOperationAction(ISD::GET_FPENV, VT, Expand); |
966 | setOperationAction(ISD::SET_FPENV, VT, Expand); |
967 | setOperationAction(ISD::RESET_FPENV, VT, Expand); |
968 | } |
969 | |
970 | // Most targets ignore the @llvm.prefetch intrinsic. |
971 | setOperationAction(ISD::PREFETCH, MVT::Other, Expand); |
972 | |
973 | // Most targets also ignore the @llvm.readcyclecounter intrinsic. |
974 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); |
975 | |
976 | // Most targets also ignore the @llvm.readsteadycounter intrinsic. |
977 | setOperationAction(ISD::READSTEADYCOUNTER, MVT::i64, Expand); |
978 | |
979 | // ConstantFP nodes default to expand. Targets can either change this to |
980 | // Legal, in which case all fp constants are legal, or use isFPImmLegal() |
981 | // to optimize expansions for certain constants. |
982 | setOperationAction(ISD::ConstantFP, |
983 | {MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128}, |
984 | Expand); |
985 | |
986 | // These library functions default to expand. |
987 | setOperationAction({ISD::FCBRT, ISD::FLOG, ISD::FLOG2, ISD::FLOG10, ISD::FEXP, |
988 | ISD::FEXP2, ISD::FEXP10, ISD::FFLOOR, ISD::FNEARBYINT, |
989 | ISD::FCEIL, ISD::FRINT, ISD::FTRUNC, ISD::LROUND, |
990 | ISD::LLROUND, ISD::LRINT, ISD::LLRINT, ISD::FROUNDEVEN}, |
991 | {MVT::f32, MVT::f64, MVT::f128}, Expand); |
992 | |
993 | // Default ISD::TRAP to expand (which turns it into abort). |
994 | setOperationAction(ISD::TRAP, MVT::Other, Expand); |
995 | |
996 | // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" |
997 | // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. |
998 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); |
999 | |
1000 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Expand); |
1001 | |
1002 | setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Expand); |
1003 | setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Expand); |
1004 | |
1005 | for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
1006 | setOperationAction(ISD::GET_FPMODE, VT, Expand); |
1007 | setOperationAction(ISD::SET_FPMODE, VT, Expand); |
1008 | } |
1009 | setOperationAction(ISD::RESET_FPMODE, MVT::Other, Expand); |
1010 | } |
1011 | |
1012 | MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, |
1013 | EVT) const { |
1014 | return MVT::getIntegerVT(BitWidth: DL.getPointerSizeInBits(AS: 0)); |
1015 | } |
1016 | |
1017 | EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
1018 | bool LegalTypes) const { |
1019 | assert(LHSTy.isInteger() && "Shift amount is not an integer type!" ); |
1020 | if (LHSTy.isVector()) |
1021 | return LHSTy; |
1022 | MVT ShiftVT = |
1023 | LegalTypes ? getScalarShiftAmountTy(DL, LHSTy) : getPointerTy(DL); |
1024 | // If any possible shift value won't fit in the prefered type, just use |
1025 | // something safe. Assume it will be legalized when the shift is expanded. |
1026 | if (ShiftVT.getSizeInBits() < Log2_32_Ceil(LHSTy.getSizeInBits())) |
1027 | ShiftVT = MVT::i32; |
1028 | assert(ShiftVT.getSizeInBits() >= Log2_32_Ceil(LHSTy.getSizeInBits()) && |
1029 | "ShiftVT is still too small!" ); |
1030 | return ShiftVT; |
1031 | } |
1032 | |
1033 | bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { |
1034 | assert(isTypeLegal(VT)); |
1035 | switch (Op) { |
1036 | default: |
1037 | return false; |
1038 | case ISD::SDIV: |
1039 | case ISD::UDIV: |
1040 | case ISD::SREM: |
1041 | case ISD::UREM: |
1042 | return true; |
1043 | } |
1044 | } |
1045 | |
1046 | bool TargetLoweringBase::isFreeAddrSpaceCast(unsigned SrcAS, |
1047 | unsigned DestAS) const { |
1048 | return TM.isNoopAddrSpaceCast(SrcAS, DestAS); |
1049 | } |
1050 | |
1051 | void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { |
1052 | // If the command-line option was specified, ignore this request. |
1053 | if (!JumpIsExpensiveOverride.getNumOccurrences()) |
1054 | JumpIsExpensive = isExpensive; |
1055 | } |
1056 | |
1057 | TargetLoweringBase::LegalizeKind |
1058 | TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { |
1059 | // If this is a simple type, use the ComputeRegisterProp mechanism. |
1060 | if (VT.isSimple()) { |
1061 | MVT SVT = VT.getSimpleVT(); |
1062 | assert((unsigned)SVT.SimpleTy < std::size(TransformToType)); |
1063 | MVT NVT = TransformToType[SVT.SimpleTy]; |
1064 | LegalizeTypeAction LA = ValueTypeActions.getTypeAction(VT: SVT); |
1065 | |
1066 | assert((LA == TypeLegal || LA == TypeSoftenFloat || |
1067 | LA == TypeSoftPromoteHalf || |
1068 | (NVT.isVector() || |
1069 | ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) && |
1070 | "Promote may not follow Expand or Promote" ); |
1071 | |
1072 | if (LA == TypeSplitVector) |
1073 | return LegalizeKind(LA, EVT(SVT).getHalfNumVectorElementsVT(Context)); |
1074 | if (LA == TypeScalarizeVector) |
1075 | return LegalizeKind(LA, SVT.getVectorElementType()); |
1076 | return LegalizeKind(LA, NVT); |
1077 | } |
1078 | |
1079 | // Handle Extended Scalar Types. |
1080 | if (!VT.isVector()) { |
1081 | assert(VT.isInteger() && "Float types must be simple" ); |
1082 | unsigned BitSize = VT.getSizeInBits(); |
1083 | // First promote to a power-of-two size, then expand if necessary. |
1084 | if (BitSize < 8 || !isPowerOf2_32(Value: BitSize)) { |
1085 | EVT NVT = VT.getRoundIntegerType(Context); |
1086 | assert(NVT != VT && "Unable to round integer VT" ); |
1087 | LegalizeKind NextStep = getTypeConversion(Context, VT: NVT); |
1088 | // Avoid multi-step promotion. |
1089 | if (NextStep.first == TypePromoteInteger) |
1090 | return NextStep; |
1091 | // Return rounded integer type. |
1092 | return LegalizeKind(TypePromoteInteger, NVT); |
1093 | } |
1094 | |
1095 | return LegalizeKind(TypeExpandInteger, |
1096 | EVT::getIntegerVT(Context, BitWidth: VT.getSizeInBits() / 2)); |
1097 | } |
1098 | |
1099 | // Handle vector types. |
1100 | ElementCount NumElts = VT.getVectorElementCount(); |
1101 | EVT EltVT = VT.getVectorElementType(); |
1102 | |
1103 | // Vectors with only one element are always scalarized. |
1104 | if (NumElts.isScalar()) |
1105 | return LegalizeKind(TypeScalarizeVector, EltVT); |
1106 | |
1107 | // Try to widen vector elements until the element type is a power of two and |
1108 | // promote it to a legal type later on, for example: |
1109 | // <3 x i8> -> <4 x i8> -> <4 x i32> |
1110 | if (EltVT.isInteger()) { |
1111 | // Vectors with a number of elements that is not a power of two are always |
1112 | // widened, for example <3 x i8> -> <4 x i8>. |
1113 | if (!VT.isPow2VectorType()) { |
1114 | NumElts = NumElts.coefficientNextPowerOf2(); |
1115 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, EC: NumElts); |
1116 | return LegalizeKind(TypeWidenVector, NVT); |
1117 | } |
1118 | |
1119 | // Examine the element type. |
1120 | LegalizeKind LK = getTypeConversion(Context, VT: EltVT); |
1121 | |
1122 | // If type is to be expanded, split the vector. |
1123 | // <4 x i140> -> <2 x i140> |
1124 | if (LK.first == TypeExpandInteger) { |
1125 | if (VT.getVectorElementCount().isScalable()) |
1126 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1127 | return LegalizeKind(TypeSplitVector, |
1128 | VT.getHalfNumVectorElementsVT(Context)); |
1129 | } |
1130 | |
1131 | // Promote the integer element types until a legal vector type is found |
1132 | // or until the element integer type is too big. If a legal type was not |
1133 | // found, fallback to the usual mechanism of widening/splitting the |
1134 | // vector. |
1135 | EVT OldEltVT = EltVT; |
1136 | while (true) { |
1137 | // Increase the bitwidth of the element to the next pow-of-two |
1138 | // (which is greater than 8 bits). |
1139 | EltVT = EVT::getIntegerVT(Context, BitWidth: 1 + EltVT.getSizeInBits()) |
1140 | .getRoundIntegerType(Context); |
1141 | |
1142 | // Stop trying when getting a non-simple element type. |
1143 | // Note that vector elements may be greater than legal vector element |
1144 | // types. Example: X86 XMM registers hold 64bit element on 32bit |
1145 | // systems. |
1146 | if (!EltVT.isSimple()) |
1147 | break; |
1148 | |
1149 | // Build a new vector type and check if it is legal. |
1150 | MVT NVT = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1151 | // Found a legal promoted vector type. |
1152 | if (NVT != MVT() && ValueTypeActions.getTypeAction(VT: NVT) == TypeLegal) |
1153 | return LegalizeKind(TypePromoteInteger, |
1154 | EVT::getVectorVT(Context, VT: EltVT, EC: NumElts)); |
1155 | } |
1156 | |
1157 | // Reset the type to the unexpanded type if we did not find a legal vector |
1158 | // type with a promoted vector element type. |
1159 | EltVT = OldEltVT; |
1160 | } |
1161 | |
1162 | // Try to widen the vector until a legal type is found. |
1163 | // If there is no wider legal type, split the vector. |
1164 | while (true) { |
1165 | // Round up to the next power of 2. |
1166 | NumElts = NumElts.coefficientNextPowerOf2(); |
1167 | |
1168 | // If there is no simple vector type with this many elements then there |
1169 | // cannot be a larger legal vector type. Note that this assumes that |
1170 | // there are no skipped intermediate vector types in the simple types. |
1171 | if (!EltVT.isSimple()) |
1172 | break; |
1173 | MVT LargerVector = MVT::getVectorVT(VT: EltVT.getSimpleVT(), EC: NumElts); |
1174 | if (LargerVector == MVT()) |
1175 | break; |
1176 | |
1177 | // If this type is legal then widen the vector. |
1178 | if (ValueTypeActions.getTypeAction(VT: LargerVector) == TypeLegal) |
1179 | return LegalizeKind(TypeWidenVector, LargerVector); |
1180 | } |
1181 | |
1182 | // Widen odd vectors to next power of two. |
1183 | if (!VT.isPow2VectorType()) { |
1184 | EVT NVT = VT.getPow2VectorType(Context); |
1185 | return LegalizeKind(TypeWidenVector, NVT); |
1186 | } |
1187 | |
1188 | if (VT.getVectorElementCount() == ElementCount::getScalable(MinVal: 1)) |
1189 | return LegalizeKind(TypeScalarizeScalableVector, EltVT); |
1190 | |
1191 | // Vectors with illegal element types are expanded. |
1192 | EVT NVT = EVT::getVectorVT(Context, VT: EltVT, |
1193 | EC: VT.getVectorElementCount().divideCoefficientBy(RHS: 2)); |
1194 | return LegalizeKind(TypeSplitVector, NVT); |
1195 | } |
1196 | |
1197 | static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, |
1198 | unsigned &NumIntermediates, |
1199 | MVT &RegisterVT, |
1200 | TargetLoweringBase *TLI) { |
1201 | // Figure out the right, legal destination reg to copy into. |
1202 | ElementCount EC = VT.getVectorElementCount(); |
1203 | MVT EltTy = VT.getVectorElementType(); |
1204 | |
1205 | unsigned NumVectorRegs = 1; |
1206 | |
1207 | // Scalable vectors cannot be scalarized, so splitting or widening is |
1208 | // required. |
1209 | if (VT.isScalableVector() && !isPowerOf2_32(Value: EC.getKnownMinValue())) |
1210 | llvm_unreachable( |
1211 | "Splitting or widening of non-power-of-2 MVTs is not implemented." ); |
1212 | |
1213 | // FIXME: We don't support non-power-of-2-sized vectors for now. |
1214 | // Ideally we could break down into LHS/RHS like LegalizeDAG does. |
1215 | if (!isPowerOf2_32(Value: EC.getKnownMinValue())) { |
1216 | // Split EC to unit size (scalable property is preserved). |
1217 | NumVectorRegs = EC.getKnownMinValue(); |
1218 | EC = ElementCount::getFixed(MinVal: 1); |
1219 | } |
1220 | |
1221 | // Divide the input until we get to a supported size. This will |
1222 | // always end up with an EC that represent a scalar or a scalable |
1223 | // scalar. |
1224 | while (EC.getKnownMinValue() > 1 && |
1225 | !TLI->isTypeLegal(VT: MVT::getVectorVT(VT: EltTy, EC))) { |
1226 | EC = EC.divideCoefficientBy(RHS: 2); |
1227 | NumVectorRegs <<= 1; |
1228 | } |
1229 | |
1230 | NumIntermediates = NumVectorRegs; |
1231 | |
1232 | MVT NewVT = MVT::getVectorVT(VT: EltTy, EC); |
1233 | if (!TLI->isTypeLegal(VT: NewVT)) |
1234 | NewVT = EltTy; |
1235 | IntermediateVT = NewVT; |
1236 | |
1237 | unsigned LaneSizeInBits = NewVT.getScalarSizeInBits(); |
1238 | |
1239 | // Convert sizes such as i33 to i64. |
1240 | LaneSizeInBits = llvm::bit_ceil(Value: LaneSizeInBits); |
1241 | |
1242 | MVT DestVT = TLI->getRegisterType(VT: NewVT); |
1243 | RegisterVT = DestVT; |
1244 | if (EVT(DestVT).bitsLT(VT: NewVT)) // Value is expanded, e.g. i64 -> i16. |
1245 | return NumVectorRegs * (LaneSizeInBits / DestVT.getScalarSizeInBits()); |
1246 | |
1247 | // Otherwise, promotion or legal types use the same number of registers as |
1248 | // the vector decimated to the appropriate level. |
1249 | return NumVectorRegs; |
1250 | } |
1251 | |
1252 | /// isLegalRC - Return true if the value types that can be represented by the |
1253 | /// specified register class are all legal. |
1254 | bool TargetLoweringBase::isLegalRC(const TargetRegisterInfo &TRI, |
1255 | const TargetRegisterClass &RC) const { |
1256 | for (const auto *I = TRI.legalclasstypes_begin(RC); *I != MVT::Other; ++I) |
1257 | if (isTypeLegal(*I)) |
1258 | return true; |
1259 | return false; |
1260 | } |
1261 | |
1262 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
1263 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
1264 | MachineBasicBlock * |
1265 | TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI, |
1266 | MachineBasicBlock *MBB) const { |
1267 | MachineInstr *MI = &InitialMI; |
1268 | MachineFunction &MF = *MI->getMF(); |
1269 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1270 | |
1271 | // We're handling multiple types of operands here: |
1272 | // PATCHPOINT MetaArgs - live-in, read only, direct |
1273 | // STATEPOINT Deopt Spill - live-through, read only, indirect |
1274 | // STATEPOINT Deopt Alloca - live-through, read only, direct |
1275 | // (We're currently conservative and mark the deopt slots read/write in |
1276 | // practice.) |
1277 | // STATEPOINT GC Spill - live-through, read/write, indirect |
1278 | // STATEPOINT GC Alloca - live-through, read/write, direct |
1279 | // The live-in vs live-through is handled already (the live through ones are |
1280 | // all stack slots), but we need to handle the different type of stackmap |
1281 | // operands and memory effects here. |
1282 | |
1283 | if (llvm::none_of(Range: MI->operands(), |
1284 | P: [](MachineOperand &Operand) { return Operand.isFI(); })) |
1285 | return MBB; |
1286 | |
1287 | MachineInstrBuilder MIB = BuildMI(MF, MIMD: MI->getDebugLoc(), MCID: MI->getDesc()); |
1288 | |
1289 | // Inherit previous memory operands. |
1290 | MIB.cloneMemRefs(OtherMI: *MI); |
1291 | |
1292 | for (unsigned i = 0; i < MI->getNumOperands(); ++i) { |
1293 | MachineOperand &MO = MI->getOperand(i); |
1294 | if (!MO.isFI()) { |
1295 | // Index of Def operand this Use it tied to. |
1296 | // Since Defs are coming before Uses, if Use is tied, then |
1297 | // index of Def must be smaller that index of that Use. |
1298 | // Also, Defs preserve their position in new MI. |
1299 | unsigned TiedTo = i; |
1300 | if (MO.isReg() && MO.isTied()) |
1301 | TiedTo = MI->findTiedOperandIdx(OpIdx: i); |
1302 | MIB.add(MO); |
1303 | if (TiedTo < i) |
1304 | MIB->tieOperands(DefIdx: TiedTo, UseIdx: MIB->getNumOperands() - 1); |
1305 | continue; |
1306 | } |
1307 | |
1308 | // foldMemoryOperand builds a new MI after replacing a single FI operand |
1309 | // with the canonical set of five x86 addressing-mode operands. |
1310 | int FI = MO.getIndex(); |
1311 | |
1312 | // Add frame index operands recognized by stackmaps.cpp |
1313 | if (MFI.isStatepointSpillSlotObjectIndex(ObjectIdx: FI)) { |
1314 | // indirect-mem-ref tag, size, #FI, offset. |
1315 | // Used for spills inserted by StatepointLowering. This codepath is not |
1316 | // used for patchpoints/stackmaps at all, for these spilling is done via |
1317 | // foldMemoryOperand callback only. |
1318 | assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity" ); |
1319 | MIB.addImm(Val: StackMaps::IndirectMemRefOp); |
1320 | MIB.addImm(Val: MFI.getObjectSize(ObjectIdx: FI)); |
1321 | MIB.add(MO); |
1322 | MIB.addImm(Val: 0); |
1323 | } else { |
1324 | // direct-mem-ref tag, #FI, offset. |
1325 | // Used by patchpoint, and direct alloca arguments to statepoints |
1326 | MIB.addImm(Val: StackMaps::DirectMemRefOp); |
1327 | MIB.add(MO); |
1328 | MIB.addImm(Val: 0); |
1329 | } |
1330 | |
1331 | assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!" ); |
1332 | |
1333 | // Add a new memory operand for this FI. |
1334 | assert(MFI.getObjectOffset(FI) != -1); |
1335 | |
1336 | // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and |
1337 | // PATCHPOINT should be updated to do the same. (TODO) |
1338 | if (MI->getOpcode() != TargetOpcode::STATEPOINT) { |
1339 | auto Flags = MachineMemOperand::MOLoad; |
1340 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
1341 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI), F: Flags, |
1342 | Size: MF.getDataLayout().getPointerSize(), BaseAlignment: MFI.getObjectAlign(ObjectIdx: FI)); |
1343 | MIB->addMemOperand(MF, MO: MMO); |
1344 | } |
1345 | } |
1346 | MBB->insert(I: MachineBasicBlock::iterator(MI), MI: MIB); |
1347 | MI->eraseFromParent(); |
1348 | return MBB; |
1349 | } |
1350 | |
1351 | /// findRepresentativeClass - Return the largest legal super-reg register class |
1352 | /// of the register class for the specified type and its associated "cost". |
1353 | // This function is in TargetLowering because it uses RegClassForVT which would |
1354 | // need to be moved to TargetRegisterInfo and would necessitate moving |
1355 | // isTypeLegal over as well - a massive change that would just require |
1356 | // TargetLowering having a TargetRegisterInfo class member that it would use. |
1357 | std::pair<const TargetRegisterClass *, uint8_t> |
1358 | TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, |
1359 | MVT VT) const { |
1360 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
1361 | if (!RC) |
1362 | return std::make_pair(x&: RC, y: 0); |
1363 | |
1364 | // Compute the set of all super-register classes. |
1365 | BitVector SuperRegRC(TRI->getNumRegClasses()); |
1366 | for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) |
1367 | SuperRegRC.setBitsInMask(Mask: RCI.getMask()); |
1368 | |
1369 | // Find the first legal register class with the largest spill size. |
1370 | const TargetRegisterClass *BestRC = RC; |
1371 | for (unsigned i : SuperRegRC.set_bits()) { |
1372 | const TargetRegisterClass *SuperRC = TRI->getRegClass(i); |
1373 | // We want the largest possible spill size. |
1374 | if (TRI->getSpillSize(RC: *SuperRC) <= TRI->getSpillSize(RC: *BestRC)) |
1375 | continue; |
1376 | if (!isLegalRC(TRI: *TRI, RC: *SuperRC)) |
1377 | continue; |
1378 | BestRC = SuperRC; |
1379 | } |
1380 | return std::make_pair(x&: BestRC, y: 1); |
1381 | } |
1382 | |
1383 | /// computeRegisterProperties - Once all of the register classes are added, |
1384 | /// this allows us to compute derived properties we expose. |
1385 | void TargetLoweringBase::computeRegisterProperties( |
1386 | const TargetRegisterInfo *TRI) { |
1387 | static_assert(MVT::VALUETYPE_SIZE <= MVT::MAX_ALLOWED_VALUETYPE, |
1388 | "Too many value types for ValueTypeActions to hold!" ); |
1389 | |
1390 | // Everything defaults to needing one register. |
1391 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1392 | NumRegistersForVT[i] = 1; |
1393 | RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; |
1394 | } |
1395 | // ...except isVoid, which doesn't need any registers. |
1396 | NumRegistersForVT[MVT::isVoid] = 0; |
1397 | |
1398 | // Find the largest integer register class. |
1399 | unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; |
1400 | for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) |
1401 | assert(LargestIntReg != MVT::i1 && "No integer registers defined!" ); |
1402 | |
1403 | // Every integer value type larger than this largest register takes twice as |
1404 | // many registers to represent as the previous ValueType. |
1405 | for (unsigned ExpandedReg = LargestIntReg + 1; |
1406 | ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { |
1407 | NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; |
1408 | RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; |
1409 | TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); |
1410 | ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, |
1411 | TypeExpandInteger); |
1412 | } |
1413 | |
1414 | // Inspect all of the ValueType's smaller than the largest integer |
1415 | // register to see which ones need promotion. |
1416 | unsigned LegalIntReg = LargestIntReg; |
1417 | for (unsigned IntReg = LargestIntReg - 1; |
1418 | IntReg >= (unsigned)MVT::i1; --IntReg) { |
1419 | MVT IVT = (MVT::SimpleValueType)IntReg; |
1420 | if (isTypeLegal(IVT)) { |
1421 | LegalIntReg = IntReg; |
1422 | } else { |
1423 | RegisterTypeForVT[IntReg] = TransformToType[IntReg] = |
1424 | (MVT::SimpleValueType)LegalIntReg; |
1425 | ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); |
1426 | } |
1427 | } |
1428 | |
1429 | // ppcf128 type is really two f64's. |
1430 | if (!isTypeLegal(MVT::ppcf128)) { |
1431 | if (isTypeLegal(MVT::f64)) { |
1432 | NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; |
1433 | RegisterTypeForVT[MVT::ppcf128] = MVT::f64; |
1434 | TransformToType[MVT::ppcf128] = MVT::f64; |
1435 | ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); |
1436 | } else { |
1437 | NumRegistersForVT[MVT::ppcf128] = NumRegistersForVT[MVT::i128]; |
1438 | RegisterTypeForVT[MVT::ppcf128] = RegisterTypeForVT[MVT::i128]; |
1439 | TransformToType[MVT::ppcf128] = MVT::i128; |
1440 | ValueTypeActions.setTypeAction(MVT::ppcf128, TypeSoftenFloat); |
1441 | } |
1442 | } |
1443 | |
1444 | // Decide how to handle f128. If the target does not have native f128 support, |
1445 | // expand it to i128 and we will be generating soft float library calls. |
1446 | if (!isTypeLegal(MVT::f128)) { |
1447 | NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; |
1448 | RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; |
1449 | TransformToType[MVT::f128] = MVT::i128; |
1450 | ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); |
1451 | } |
1452 | |
1453 | // Decide how to handle f80. If the target does not have native f80 support, |
1454 | // expand it to i96 and we will be generating soft float library calls. |
1455 | if (!isTypeLegal(MVT::f80)) { |
1456 | NumRegistersForVT[MVT::f80] = 3*NumRegistersForVT[MVT::i32]; |
1457 | RegisterTypeForVT[MVT::f80] = RegisterTypeForVT[MVT::i32]; |
1458 | TransformToType[MVT::f80] = MVT::i32; |
1459 | ValueTypeActions.setTypeAction(MVT::f80, TypeSoftenFloat); |
1460 | } |
1461 | |
1462 | // Decide how to handle f64. If the target does not have native f64 support, |
1463 | // expand it to i64 and we will be generating soft float library calls. |
1464 | if (!isTypeLegal(MVT::f64)) { |
1465 | NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; |
1466 | RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; |
1467 | TransformToType[MVT::f64] = MVT::i64; |
1468 | ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); |
1469 | } |
1470 | |
1471 | // Decide how to handle f32. If the target does not have native f32 support, |
1472 | // expand it to i32 and we will be generating soft float library calls. |
1473 | if (!isTypeLegal(MVT::f32)) { |
1474 | NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; |
1475 | RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; |
1476 | TransformToType[MVT::f32] = MVT::i32; |
1477 | ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); |
1478 | } |
1479 | |
1480 | // Decide how to handle f16. If the target does not have native f16 support, |
1481 | // promote it to f32, because there are no f16 library calls (except for |
1482 | // conversions). |
1483 | if (!isTypeLegal(MVT::f16)) { |
1484 | // Allow targets to control how we legalize half. |
1485 | bool SoftPromoteHalfType = softPromoteHalfType(); |
1486 | bool UseFPRegsForHalfType = !SoftPromoteHalfType || useFPRegsForHalfType(); |
1487 | |
1488 | if (!UseFPRegsForHalfType) { |
1489 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::i16]; |
1490 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::i16]; |
1491 | } else { |
1492 | NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; |
1493 | RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; |
1494 | } |
1495 | TransformToType[MVT::f16] = MVT::f32; |
1496 | if (SoftPromoteHalfType) { |
1497 | ValueTypeActions.setTypeAction(MVT::f16, TypeSoftPromoteHalf); |
1498 | } else { |
1499 | ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); |
1500 | } |
1501 | } |
1502 | |
1503 | // Decide how to handle bf16. If the target does not have native bf16 support, |
1504 | // promote it to f32, because there are no bf16 library calls (except for |
1505 | // converting from f32 to bf16). |
1506 | if (!isTypeLegal(MVT::bf16)) { |
1507 | NumRegistersForVT[MVT::bf16] = NumRegistersForVT[MVT::f32]; |
1508 | RegisterTypeForVT[MVT::bf16] = RegisterTypeForVT[MVT::f32]; |
1509 | TransformToType[MVT::bf16] = MVT::f32; |
1510 | ValueTypeActions.setTypeAction(MVT::bf16, TypeSoftPromoteHalf); |
1511 | } |
1512 | |
1513 | // Loop over all of the vector value types to see which need transformations. |
1514 | for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; |
1515 | i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { |
1516 | MVT VT = (MVT::SimpleValueType) i; |
1517 | if (isTypeLegal(VT)) |
1518 | continue; |
1519 | |
1520 | MVT EltVT = VT.getVectorElementType(); |
1521 | ElementCount EC = VT.getVectorElementCount(); |
1522 | bool IsLegalWiderType = false; |
1523 | bool IsScalable = VT.isScalableVector(); |
1524 | LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); |
1525 | switch (PreferredAction) { |
1526 | case TypePromoteInteger: { |
1527 | MVT::SimpleValueType EndVT = IsScalable ? |
1528 | MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE : |
1529 | MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE; |
1530 | // Try to promote the elements of integer vectors. If no legal |
1531 | // promotion was found, fall through to the widen-vector method. |
1532 | for (unsigned nVT = i + 1; |
1533 | (MVT::SimpleValueType)nVT <= EndVT; ++nVT) { |
1534 | MVT SVT = (MVT::SimpleValueType) nVT; |
1535 | // Promote vectors of integers to vectors with the same number |
1536 | // of elements, with a wider element type. |
1537 | if (SVT.getScalarSizeInBits() > EltVT.getFixedSizeInBits() && |
1538 | SVT.getVectorElementCount() == EC && isTypeLegal(SVT)) { |
1539 | TransformToType[i] = SVT; |
1540 | RegisterTypeForVT[i] = SVT; |
1541 | NumRegistersForVT[i] = 1; |
1542 | ValueTypeActions.setTypeAction(VT, TypePromoteInteger); |
1543 | IsLegalWiderType = true; |
1544 | break; |
1545 | } |
1546 | } |
1547 | if (IsLegalWiderType) |
1548 | break; |
1549 | [[fallthrough]]; |
1550 | } |
1551 | |
1552 | case TypeWidenVector: |
1553 | if (isPowerOf2_32(EC.getKnownMinValue())) { |
1554 | // Try to widen the vector. |
1555 | for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { |
1556 | MVT SVT = (MVT::SimpleValueType) nVT; |
1557 | if (SVT.getVectorElementType() == EltVT && |
1558 | SVT.isScalableVector() == IsScalable && |
1559 | SVT.getVectorElementCount().getKnownMinValue() > |
1560 | EC.getKnownMinValue() && |
1561 | isTypeLegal(SVT)) { |
1562 | TransformToType[i] = SVT; |
1563 | RegisterTypeForVT[i] = SVT; |
1564 | NumRegistersForVT[i] = 1; |
1565 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1566 | IsLegalWiderType = true; |
1567 | break; |
1568 | } |
1569 | } |
1570 | if (IsLegalWiderType) |
1571 | break; |
1572 | } else { |
1573 | // Only widen to the next power of 2 to keep consistency with EVT. |
1574 | MVT NVT = VT.getPow2VectorType(); |
1575 | if (isTypeLegal(NVT)) { |
1576 | TransformToType[i] = NVT; |
1577 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1578 | RegisterTypeForVT[i] = NVT; |
1579 | NumRegistersForVT[i] = 1; |
1580 | break; |
1581 | } |
1582 | } |
1583 | [[fallthrough]]; |
1584 | |
1585 | case TypeSplitVector: |
1586 | case TypeScalarizeVector: { |
1587 | MVT IntermediateVT; |
1588 | MVT RegisterVT; |
1589 | unsigned NumIntermediates; |
1590 | unsigned NumRegisters = getVectorTypeBreakdownMVT(VT, IntermediateVT, |
1591 | NumIntermediates, RegisterVT, this); |
1592 | NumRegistersForVT[i] = NumRegisters; |
1593 | assert(NumRegistersForVT[i] == NumRegisters && |
1594 | "NumRegistersForVT size cannot represent NumRegisters!" ); |
1595 | RegisterTypeForVT[i] = RegisterVT; |
1596 | |
1597 | MVT NVT = VT.getPow2VectorType(); |
1598 | if (NVT == VT) { |
1599 | // Type is already a power of 2. The default action is to split. |
1600 | TransformToType[i] = MVT::Other; |
1601 | if (PreferredAction == TypeScalarizeVector) |
1602 | ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); |
1603 | else if (PreferredAction == TypeSplitVector) |
1604 | ValueTypeActions.setTypeAction(VT, TypeSplitVector); |
1605 | else if (EC.getKnownMinValue() > 1) |
1606 | ValueTypeActions.setTypeAction(VT, TypeSplitVector); |
1607 | else |
1608 | ValueTypeActions.setTypeAction(VT, EC.isScalable() |
1609 | ? TypeScalarizeScalableVector |
1610 | : TypeScalarizeVector); |
1611 | } else { |
1612 | TransformToType[i] = NVT; |
1613 | ValueTypeActions.setTypeAction(VT, TypeWidenVector); |
1614 | } |
1615 | break; |
1616 | } |
1617 | default: |
1618 | llvm_unreachable("Unknown vector legalization action!" ); |
1619 | } |
1620 | } |
1621 | |
1622 | // Determine the 'representative' register class for each value type. |
1623 | // An representative register class is the largest (meaning one which is |
1624 | // not a sub-register class / subreg register class) legal register class for |
1625 | // a group of value types. For example, on i386, i8, i16, and i32 |
1626 | // representative would be GR32; while on x86_64 it's GR64. |
1627 | for (unsigned i = 0; i != MVT::VALUETYPE_SIZE; ++i) { |
1628 | const TargetRegisterClass* RRC; |
1629 | uint8_t Cost; |
1630 | std::tie(args&: RRC, args&: Cost) = findRepresentativeClass(TRI, VT: (MVT::SimpleValueType)i); |
1631 | RepRegClassForVT[i] = RRC; |
1632 | RepRegClassCostForVT[i] = Cost; |
1633 | } |
1634 | } |
1635 | |
1636 | EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, |
1637 | EVT VT) const { |
1638 | assert(!VT.isVector() && "No default SetCC type for vectors!" ); |
1639 | return getPointerTy(DL).SimpleTy; |
1640 | } |
1641 | |
1642 | MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { |
1643 | return MVT::i32; // return the default value |
1644 | } |
1645 | |
1646 | /// getVectorTypeBreakdown - Vector types are broken down into some number of |
1647 | /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 |
1648 | /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. |
1649 | /// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. |
1650 | /// |
1651 | /// This method returns the number of registers needed, and the VT for each |
1652 | /// register. It also returns the VT and quantity of the intermediate values |
1653 | /// before they are promoted/expanded. |
1654 | unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, |
1655 | EVT VT, EVT &IntermediateVT, |
1656 | unsigned &NumIntermediates, |
1657 | MVT &RegisterVT) const { |
1658 | ElementCount EltCnt = VT.getVectorElementCount(); |
1659 | |
1660 | // If there is a wider vector type with the same element type as this one, |
1661 | // or a promoted vector type that has the same number of elements which |
1662 | // are wider, then we should convert to that legal vector type. |
1663 | // This handles things like <2 x float> -> <4 x float> and |
1664 | // <4 x i1> -> <4 x i32>. |
1665 | LegalizeTypeAction TA = getTypeAction(Context, VT); |
1666 | if (!EltCnt.isScalar() && |
1667 | (TA == TypeWidenVector || TA == TypePromoteInteger)) { |
1668 | EVT RegisterEVT = getTypeToTransformTo(Context, VT); |
1669 | if (isTypeLegal(VT: RegisterEVT)) { |
1670 | IntermediateVT = RegisterEVT; |
1671 | RegisterVT = RegisterEVT.getSimpleVT(); |
1672 | NumIntermediates = 1; |
1673 | return 1; |
1674 | } |
1675 | } |
1676 | |
1677 | // Figure out the right, legal destination reg to copy into. |
1678 | EVT EltTy = VT.getVectorElementType(); |
1679 | |
1680 | unsigned NumVectorRegs = 1; |
1681 | |
1682 | // Scalable vectors cannot be scalarized, so handle the legalisation of the |
1683 | // types like done elsewhere in SelectionDAG. |
1684 | if (EltCnt.isScalable()) { |
1685 | LegalizeKind LK; |
1686 | EVT PartVT = VT; |
1687 | do { |
1688 | // Iterate until we've found a legal (part) type to hold VT. |
1689 | LK = getTypeConversion(Context, VT: PartVT); |
1690 | PartVT = LK.second; |
1691 | } while (LK.first != TypeLegal); |
1692 | |
1693 | if (!PartVT.isVector()) { |
1694 | report_fatal_error( |
1695 | reason: "Don't know how to legalize this scalable vector type" ); |
1696 | } |
1697 | |
1698 | NumIntermediates = |
1699 | divideCeil(Numerator: VT.getVectorElementCount().getKnownMinValue(), |
1700 | Denominator: PartVT.getVectorElementCount().getKnownMinValue()); |
1701 | IntermediateVT = PartVT; |
1702 | RegisterVT = getRegisterType(Context, VT: IntermediateVT); |
1703 | return NumIntermediates; |
1704 | } |
1705 | |
1706 | // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally |
1707 | // we could break down into LHS/RHS like LegalizeDAG does. |
1708 | if (!isPowerOf2_32(Value: EltCnt.getKnownMinValue())) { |
1709 | NumVectorRegs = EltCnt.getKnownMinValue(); |
1710 | EltCnt = ElementCount::getFixed(MinVal: 1); |
1711 | } |
1712 | |
1713 | // Divide the input until we get to a supported size. This will always |
1714 | // end with a scalar if the target doesn't support vectors. |
1715 | while (EltCnt.getKnownMinValue() > 1 && |
1716 | !isTypeLegal(VT: EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt))) { |
1717 | EltCnt = EltCnt.divideCoefficientBy(RHS: 2); |
1718 | NumVectorRegs <<= 1; |
1719 | } |
1720 | |
1721 | NumIntermediates = NumVectorRegs; |
1722 | |
1723 | EVT NewVT = EVT::getVectorVT(Context, VT: EltTy, EC: EltCnt); |
1724 | if (!isTypeLegal(VT: NewVT)) |
1725 | NewVT = EltTy; |
1726 | IntermediateVT = NewVT; |
1727 | |
1728 | MVT DestVT = getRegisterType(Context, VT: NewVT); |
1729 | RegisterVT = DestVT; |
1730 | |
1731 | if (EVT(DestVT).bitsLT(VT: NewVT)) { // Value is expanded, e.g. i64 -> i16. |
1732 | TypeSize NewVTSize = NewVT.getSizeInBits(); |
1733 | // Convert sizes such as i33 to i64. |
1734 | if (!llvm::has_single_bit<uint32_t>(Value: NewVTSize.getKnownMinValue())) |
1735 | NewVTSize = NewVTSize.coefficientNextPowerOf2(); |
1736 | return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); |
1737 | } |
1738 | |
1739 | // Otherwise, promotion or legal types use the same number of registers as |
1740 | // the vector decimated to the appropriate level. |
1741 | return NumVectorRegs; |
1742 | } |
1743 | |
1744 | bool TargetLoweringBase::isSuitableForJumpTable(const SwitchInst *SI, |
1745 | uint64_t NumCases, |
1746 | uint64_t Range, |
1747 | ProfileSummaryInfo *PSI, |
1748 | BlockFrequencyInfo *BFI) const { |
1749 | // FIXME: This function check the maximum table size and density, but the |
1750 | // minimum size is not checked. It would be nice if the minimum size is |
1751 | // also combined within this function. Currently, the minimum size check is |
1752 | // performed in findJumpTable() in SelectionDAGBuiler and |
1753 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
1754 | const bool OptForSize = |
1755 | SI->getParent()->getParent()->hasOptSize() || |
1756 | llvm::shouldOptimizeForSize(BB: SI->getParent(), PSI, BFI); |
1757 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
1758 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
1759 | |
1760 | // Check whether the number of cases is small enough and |
1761 | // the range is dense enough for a jump table. |
1762 | return (OptForSize || Range <= MaxJumpTableSize) && |
1763 | (NumCases * 100 >= Range * MinDensity); |
1764 | } |
1765 | |
1766 | MVT TargetLoweringBase::getPreferredSwitchConditionType(LLVMContext &Context, |
1767 | EVT ConditionVT) const { |
1768 | return getRegisterType(Context, VT: ConditionVT); |
1769 | } |
1770 | |
1771 | /// Get the EVTs and ArgFlags collections that represent the legalized return |
1772 | /// type of the given function. This does not require a DAG or a return value, |
1773 | /// and is suitable for use before any DAGs for the function are constructed. |
1774 | /// TODO: Move this out of TargetLowering.cpp. |
1775 | void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType, |
1776 | AttributeList attr, |
1777 | SmallVectorImpl<ISD::OutputArg> &Outs, |
1778 | const TargetLowering &TLI, const DataLayout &DL) { |
1779 | SmallVector<EVT, 4> ValueVTs; |
1780 | ComputeValueVTs(TLI, DL, Ty: ReturnType, ValueVTs); |
1781 | unsigned NumValues = ValueVTs.size(); |
1782 | if (NumValues == 0) return; |
1783 | |
1784 | for (unsigned j = 0, f = NumValues; j != f; ++j) { |
1785 | EVT VT = ValueVTs[j]; |
1786 | ISD::NodeType ExtendKind = ISD::ANY_EXTEND; |
1787 | |
1788 | if (attr.hasRetAttr(Attribute::SExt)) |
1789 | ExtendKind = ISD::SIGN_EXTEND; |
1790 | else if (attr.hasRetAttr(Attribute::ZExt)) |
1791 | ExtendKind = ISD::ZERO_EXTEND; |
1792 | |
1793 | if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) |
1794 | VT = TLI.getTypeForExtReturn(Context&: ReturnType->getContext(), VT, ExtendKind); |
1795 | |
1796 | unsigned NumParts = |
1797 | TLI.getNumRegistersForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1798 | MVT PartVT = |
1799 | TLI.getRegisterTypeForCallingConv(Context&: ReturnType->getContext(), CC, VT); |
1800 | |
1801 | // 'inreg' on function refers to return value |
1802 | ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); |
1803 | if (attr.hasRetAttr(Attribute::InReg)) |
1804 | Flags.setInReg(); |
1805 | |
1806 | // Propagate extension type if any |
1807 | if (attr.hasRetAttr(Attribute::SExt)) |
1808 | Flags.setSExt(); |
1809 | else if (attr.hasRetAttr(Attribute::ZExt)) |
1810 | Flags.setZExt(); |
1811 | |
1812 | for (unsigned i = 0; i < NumParts; ++i) { |
1813 | ISD::ArgFlagsTy OutFlags = Flags; |
1814 | if (NumParts > 1 && i == 0) |
1815 | OutFlags.setSplit(); |
1816 | else if (i == NumParts - 1 && i != 0) |
1817 | OutFlags.setSplitEnd(); |
1818 | |
1819 | Outs.push_back( |
1820 | Elt: ISD::OutputArg(OutFlags, PartVT, VT, /*isfixed=*/true, 0, 0)); |
1821 | } |
1822 | } |
1823 | } |
1824 | |
1825 | /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate |
1826 | /// function arguments in the caller parameter area. This is the actual |
1827 | /// alignment, not its logarithm. |
1828 | uint64_t TargetLoweringBase::getByValTypeAlignment(Type *Ty, |
1829 | const DataLayout &DL) const { |
1830 | return DL.getABITypeAlign(Ty).value(); |
1831 | } |
1832 | |
1833 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1834 | LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, |
1835 | Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
1836 | // Check if the specified alignment is sufficient based on the data layout. |
1837 | // TODO: While using the data layout works in practice, a better solution |
1838 | // would be to implement this check directly (make this a virtual function). |
1839 | // For example, the ABI alignment may change based on software platform while |
1840 | // this function should only be affected by hardware implementation. |
1841 | Type *Ty = VT.getTypeForEVT(Context); |
1842 | if (VT.isZeroSized() || Alignment >= DL.getABITypeAlign(Ty)) { |
1843 | // Assume that an access that meets the ABI-specified alignment is fast. |
1844 | if (Fast != nullptr) |
1845 | *Fast = 1; |
1846 | return true; |
1847 | } |
1848 | |
1849 | // This is a misaligned access. |
1850 | return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast); |
1851 | } |
1852 | |
1853 | bool TargetLoweringBase::allowsMemoryAccessForAlignment( |
1854 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1855 | const MachineMemOperand &MMO, unsigned *Fast) const { |
1856 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), |
1857 | Alignment: MMO.getAlign(), Flags: MMO.getFlags(), Fast); |
1858 | } |
1859 | |
1860 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1861 | const DataLayout &DL, EVT VT, |
1862 | unsigned AddrSpace, Align Alignment, |
1863 | MachineMemOperand::Flags Flags, |
1864 | unsigned *Fast) const { |
1865 | return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment, |
1866 | Flags, Fast); |
1867 | } |
1868 | |
1869 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1870 | const DataLayout &DL, EVT VT, |
1871 | const MachineMemOperand &MMO, |
1872 | unsigned *Fast) const { |
1873 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1874 | Flags: MMO.getFlags(), Fast); |
1875 | } |
1876 | |
1877 | bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, |
1878 | const DataLayout &DL, LLT Ty, |
1879 | const MachineMemOperand &MMO, |
1880 | unsigned *Fast) const { |
1881 | EVT VT = getApproximateEVTForLLT(Ty, DL, Ctx&: Context); |
1882 | return allowsMemoryAccess(Context, DL, VT, AddrSpace: MMO.getAddrSpace(), Alignment: MMO.getAlign(), |
1883 | Flags: MMO.getFlags(), Fast); |
1884 | } |
1885 | |
1886 | //===----------------------------------------------------------------------===// |
1887 | // TargetTransformInfo Helpers |
1888 | //===----------------------------------------------------------------------===// |
1889 | |
1890 | int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { |
1891 | enum InstructionOpcodes { |
1892 | #define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, |
1893 | #define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM |
1894 | #include "llvm/IR/Instruction.def" |
1895 | }; |
1896 | switch (static_cast<InstructionOpcodes>(Opcode)) { |
1897 | case Ret: return 0; |
1898 | case Br: return 0; |
1899 | case Switch: return 0; |
1900 | case IndirectBr: return 0; |
1901 | case Invoke: return 0; |
1902 | case CallBr: return 0; |
1903 | case Resume: return 0; |
1904 | case Unreachable: return 0; |
1905 | case CleanupRet: return 0; |
1906 | case CatchRet: return 0; |
1907 | case CatchPad: return 0; |
1908 | case CatchSwitch: return 0; |
1909 | case CleanupPad: return 0; |
1910 | case FNeg: return ISD::FNEG; |
1911 | case Add: return ISD::ADD; |
1912 | case FAdd: return ISD::FADD; |
1913 | case Sub: return ISD::SUB; |
1914 | case FSub: return ISD::FSUB; |
1915 | case Mul: return ISD::MUL; |
1916 | case FMul: return ISD::FMUL; |
1917 | case UDiv: return ISD::UDIV; |
1918 | case SDiv: return ISD::SDIV; |
1919 | case FDiv: return ISD::FDIV; |
1920 | case URem: return ISD::UREM; |
1921 | case SRem: return ISD::SREM; |
1922 | case FRem: return ISD::FREM; |
1923 | case Shl: return ISD::SHL; |
1924 | case LShr: return ISD::SRL; |
1925 | case AShr: return ISD::SRA; |
1926 | case And: return ISD::AND; |
1927 | case Or: return ISD::OR; |
1928 | case Xor: return ISD::XOR; |
1929 | case Alloca: return 0; |
1930 | case Load: return ISD::LOAD; |
1931 | case Store: return ISD::STORE; |
1932 | case GetElementPtr: return 0; |
1933 | case Fence: return 0; |
1934 | case AtomicCmpXchg: return 0; |
1935 | case AtomicRMW: return 0; |
1936 | case Trunc: return ISD::TRUNCATE; |
1937 | case ZExt: return ISD::ZERO_EXTEND; |
1938 | case SExt: return ISD::SIGN_EXTEND; |
1939 | case FPToUI: return ISD::FP_TO_UINT; |
1940 | case FPToSI: return ISD::FP_TO_SINT; |
1941 | case UIToFP: return ISD::UINT_TO_FP; |
1942 | case SIToFP: return ISD::SINT_TO_FP; |
1943 | case FPTrunc: return ISD::FP_ROUND; |
1944 | case FPExt: return ISD::FP_EXTEND; |
1945 | case PtrToInt: return ISD::BITCAST; |
1946 | case IntToPtr: return ISD::BITCAST; |
1947 | case BitCast: return ISD::BITCAST; |
1948 | case AddrSpaceCast: return ISD::ADDRSPACECAST; |
1949 | case ICmp: return ISD::SETCC; |
1950 | case FCmp: return ISD::SETCC; |
1951 | case PHI: return 0; |
1952 | case Call: return 0; |
1953 | case Select: return ISD::SELECT; |
1954 | case UserOp1: return 0; |
1955 | case UserOp2: return 0; |
1956 | case VAArg: return 0; |
1957 | case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; |
1958 | case InsertElement: return ISD::INSERT_VECTOR_ELT; |
1959 | case ShuffleVector: return ISD::VECTOR_SHUFFLE; |
1960 | case ExtractValue: return ISD::MERGE_VALUES; |
1961 | case InsertValue: return ISD::MERGE_VALUES; |
1962 | case LandingPad: return 0; |
1963 | case Freeze: return ISD::FREEZE; |
1964 | } |
1965 | |
1966 | llvm_unreachable("Unknown instruction type encountered!" ); |
1967 | } |
1968 | |
1969 | Value * |
1970 | TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilderBase &IRB, |
1971 | bool UseTLS) const { |
1972 | // compiler-rt provides a variable with a magic name. Targets that do not |
1973 | // link with compiler-rt may also provide such a variable. |
1974 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
1975 | const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr" ; |
1976 | auto UnsafeStackPtr = |
1977 | dyn_cast_or_null<GlobalVariable>(Val: M->getNamedValue(Name: UnsafeStackPtrVar)); |
1978 | |
1979 | Type *StackPtrTy = PointerType::getUnqual(C&: M->getContext()); |
1980 | |
1981 | if (!UnsafeStackPtr) { |
1982 | auto TLSModel = UseTLS ? |
1983 | GlobalValue::InitialExecTLSModel : |
1984 | GlobalValue::NotThreadLocal; |
1985 | // The global variable is not defined yet, define it ourselves. |
1986 | // We use the initial-exec TLS model because we do not support the |
1987 | // variable living anywhere other than in the main executable. |
1988 | UnsafeStackPtr = new GlobalVariable( |
1989 | *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr, |
1990 | UnsafeStackPtrVar, nullptr, TLSModel); |
1991 | } else { |
1992 | // The variable exists, check its type and attributes. |
1993 | if (UnsafeStackPtr->getValueType() != StackPtrTy) |
1994 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must have void* type" ); |
1995 | if (UseTLS != UnsafeStackPtr->isThreadLocal()) |
1996 | report_fatal_error(reason: Twine(UnsafeStackPtrVar) + " must " + |
1997 | (UseTLS ? "" : "not " ) + "be thread-local" ); |
1998 | } |
1999 | return UnsafeStackPtr; |
2000 | } |
2001 | |
2002 | Value * |
2003 | TargetLoweringBase::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
2004 | if (!TM.getTargetTriple().isAndroid()) |
2005 | return getDefaultSafeStackPointerLocation(IRB, UseTLS: true); |
2006 | |
2007 | // Android provides a libc function to retrieve the address of the current |
2008 | // thread's unsafe stack pointer. |
2009 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
2010 | auto *PtrTy = PointerType::getUnqual(C&: M->getContext()); |
2011 | FunctionCallee Fn = |
2012 | M->getOrInsertFunction(Name: "__safestack_pointer_address" , RetTy: PtrTy); |
2013 | return IRB.CreateCall(Callee: Fn); |
2014 | } |
2015 | |
2016 | //===----------------------------------------------------------------------===// |
2017 | // Loop Strength Reduction hooks |
2018 | //===----------------------------------------------------------------------===// |
2019 | |
2020 | /// isLegalAddressingMode - Return true if the addressing mode represented |
2021 | /// by AM is legal for this target, for a load/store of the specified type. |
2022 | bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, |
2023 | const AddrMode &AM, Type *Ty, |
2024 | unsigned AS, Instruction *I) const { |
2025 | // The default implementation of this implements a conservative RISCy, r+r and |
2026 | // r+i addr mode. |
2027 | |
2028 | // Scalable offsets not supported |
2029 | if (AM.ScalableOffset) |
2030 | return false; |
2031 | |
2032 | // Allows a sign-extended 16-bit immediate field. |
2033 | if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) |
2034 | return false; |
2035 | |
2036 | // No global is ever allowed as a base. |
2037 | if (AM.BaseGV) |
2038 | return false; |
2039 | |
2040 | // Only support r+r, |
2041 | switch (AM.Scale) { |
2042 | case 0: // "r+i" or just "i", depending on HasBaseReg. |
2043 | break; |
2044 | case 1: |
2045 | if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. |
2046 | return false; |
2047 | // Otherwise we have r+r or r+i. |
2048 | break; |
2049 | case 2: |
2050 | if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. |
2051 | return false; |
2052 | // Allow 2*r as r+r. |
2053 | break; |
2054 | default: // Don't allow n * r |
2055 | return false; |
2056 | } |
2057 | |
2058 | return true; |
2059 | } |
2060 | |
2061 | //===----------------------------------------------------------------------===// |
2062 | // Stack Protector |
2063 | //===----------------------------------------------------------------------===// |
2064 | |
2065 | // For OpenBSD return its special guard variable. Otherwise return nullptr, |
2066 | // so that SelectionDAG handle SSP. |
2067 | Value *TargetLoweringBase::getIRStackGuard(IRBuilderBase &IRB) const { |
2068 | if (getTargetMachine().getTargetTriple().isOSOpenBSD()) { |
2069 | Module &M = *IRB.GetInsertBlock()->getParent()->getParent(); |
2070 | PointerType *PtrTy = PointerType::getUnqual(C&: M.getContext()); |
2071 | Constant *C = M.getOrInsertGlobal(Name: "__guard_local" , Ty: PtrTy); |
2072 | if (GlobalVariable *G = dyn_cast_or_null<GlobalVariable>(Val: C)) |
2073 | G->setVisibility(GlobalValue::HiddenVisibility); |
2074 | return C; |
2075 | } |
2076 | return nullptr; |
2077 | } |
2078 | |
2079 | // Currently only support "standard" __stack_chk_guard. |
2080 | // TODO: add LOAD_STACK_GUARD support. |
2081 | void TargetLoweringBase::insertSSPDeclarations(Module &M) const { |
2082 | if (!M.getNamedValue(Name: "__stack_chk_guard" )) { |
2083 | auto *GV = new GlobalVariable(M, PointerType::getUnqual(C&: M.getContext()), |
2084 | false, GlobalVariable::ExternalLinkage, |
2085 | nullptr, "__stack_chk_guard" ); |
2086 | |
2087 | // FreeBSD has "__stack_chk_guard" defined externally on libc.so |
2088 | if (M.getDirectAccessExternalData() && |
2089 | !TM.getTargetTriple().isWindowsGNUEnvironment() && |
2090 | !(TM.getTargetTriple().isPPC64() && |
2091 | TM.getTargetTriple().isOSFreeBSD()) && |
2092 | (!TM.getTargetTriple().isOSDarwin() || |
2093 | TM.getRelocationModel() == Reloc::Static)) |
2094 | GV->setDSOLocal(true); |
2095 | } |
2096 | } |
2097 | |
2098 | // Currently only support "standard" __stack_chk_guard. |
2099 | // TODO: add LOAD_STACK_GUARD support. |
2100 | Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const { |
2101 | return M.getNamedValue(Name: "__stack_chk_guard" ); |
2102 | } |
2103 | |
2104 | Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const { |
2105 | return nullptr; |
2106 | } |
2107 | |
2108 | unsigned TargetLoweringBase::getMinimumJumpTableEntries() const { |
2109 | return MinimumJumpTableEntries; |
2110 | } |
2111 | |
2112 | void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) { |
2113 | MinimumJumpTableEntries = Val; |
2114 | } |
2115 | |
2116 | unsigned TargetLoweringBase::getMinimumJumpTableDensity(bool OptForSize) const { |
2117 | return OptForSize ? OptsizeJumpTableDensity : JumpTableDensity; |
2118 | } |
2119 | |
2120 | unsigned TargetLoweringBase::getMaximumJumpTableSize() const { |
2121 | return MaximumJumpTableSize; |
2122 | } |
2123 | |
2124 | void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { |
2125 | MaximumJumpTableSize = Val; |
2126 | } |
2127 | |
2128 | bool TargetLoweringBase::isJumpTableRelative() const { |
2129 | return getTargetMachine().isPositionIndependent(); |
2130 | } |
2131 | |
2132 | Align TargetLoweringBase::getPrefLoopAlignment(MachineLoop *ML) const { |
2133 | if (TM.Options.LoopAlignment) |
2134 | return Align(TM.Options.LoopAlignment); |
2135 | return PrefLoopAlignment; |
2136 | } |
2137 | |
2138 | unsigned TargetLoweringBase::getMaxPermittedBytesForAlignment( |
2139 | MachineBasicBlock *MBB) const { |
2140 | return MaxBytesForAlignment; |
2141 | } |
2142 | |
2143 | //===----------------------------------------------------------------------===// |
2144 | // Reciprocal Estimates |
2145 | //===----------------------------------------------------------------------===// |
2146 | |
2147 | /// Get the reciprocal estimate attribute string for a function that will |
2148 | /// override the target defaults. |
2149 | static StringRef getRecipEstimateForFunc(MachineFunction &MF) { |
2150 | const Function &F = MF.getFunction(); |
2151 | return F.getFnAttribute(Kind: "reciprocal-estimates" ).getValueAsString(); |
2152 | } |
2153 | |
2154 | /// Construct a string for the given reciprocal operation of the given type. |
2155 | /// This string should match the corresponding option to the front-end's |
2156 | /// "-mrecip" flag assuming those strings have been passed through in an |
2157 | /// attribute string. For example, "vec-divf" for a division of a vXf32. |
2158 | static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { |
2159 | std::string Name = VT.isVector() ? "vec-" : "" ; |
2160 | |
2161 | Name += IsSqrt ? "sqrt" : "div" ; |
2162 | |
2163 | // TODO: Handle other float types? |
2164 | if (VT.getScalarType() == MVT::f64) { |
2165 | Name += "d" ; |
2166 | } else if (VT.getScalarType() == MVT::f16) { |
2167 | Name += "h" ; |
2168 | } else { |
2169 | assert(VT.getScalarType() == MVT::f32 && |
2170 | "Unexpected FP type for reciprocal estimate" ); |
2171 | Name += "f" ; |
2172 | } |
2173 | |
2174 | return Name; |
2175 | } |
2176 | |
2177 | /// Return the character position and value (a single numeric character) of a |
2178 | /// customized refinement operation in the input string if it exists. Return |
2179 | /// false if there is no customized refinement step count. |
2180 | static bool parseRefinementStep(StringRef In, size_t &Position, |
2181 | uint8_t &Value) { |
2182 | const char RefStepToken = ':'; |
2183 | Position = In.find(C: RefStepToken); |
2184 | if (Position == StringRef::npos) |
2185 | return false; |
2186 | |
2187 | StringRef RefStepString = In.substr(Start: Position + 1); |
2188 | // Allow exactly one numeric character for the additional refinement |
2189 | // step parameter. |
2190 | if (RefStepString.size() == 1) { |
2191 | char RefStepChar = RefStepString[0]; |
2192 | if (isDigit(C: RefStepChar)) { |
2193 | Value = RefStepChar - '0'; |
2194 | return true; |
2195 | } |
2196 | } |
2197 | report_fatal_error(reason: "Invalid refinement step for -recip." ); |
2198 | } |
2199 | |
2200 | /// For the input attribute string, return one of the ReciprocalEstimate enum |
2201 | /// status values (enabled, disabled, or not specified) for this operation on |
2202 | /// the specified data type. |
2203 | static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { |
2204 | if (Override.empty()) |
2205 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2206 | |
2207 | SmallVector<StringRef, 4> OverrideVector; |
2208 | Override.split(A&: OverrideVector, Separator: ','); |
2209 | unsigned NumArgs = OverrideVector.size(); |
2210 | |
2211 | // Check if "all", "none", or "default" was specified. |
2212 | if (NumArgs == 1) { |
2213 | // Look for an optional setting of the number of refinement steps needed |
2214 | // for this type of reciprocal operation. |
2215 | size_t RefPos; |
2216 | uint8_t RefSteps; |
2217 | if (parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) { |
2218 | // Split the string for further processing. |
2219 | Override = Override.substr(Start: 0, N: RefPos); |
2220 | } |
2221 | |
2222 | // All reciprocal types are enabled. |
2223 | if (Override == "all" ) |
2224 | return TargetLoweringBase::ReciprocalEstimate::Enabled; |
2225 | |
2226 | // All reciprocal types are disabled. |
2227 | if (Override == "none" ) |
2228 | return TargetLoweringBase::ReciprocalEstimate::Disabled; |
2229 | |
2230 | // Target defaults for enablement are used. |
2231 | if (Override == "default" ) |
2232 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2233 | } |
2234 | |
2235 | // The attribute string may omit the size suffix ('f'/'d'). |
2236 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2237 | std::string VTNameNoSize = VTName; |
2238 | VTNameNoSize.pop_back(); |
2239 | static const char DisabledPrefix = '!'; |
2240 | |
2241 | for (StringRef RecipType : OverrideVector) { |
2242 | size_t RefPos; |
2243 | uint8_t RefSteps; |
2244 | if (parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2245 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2246 | |
2247 | // Ignore the disablement token for string matching. |
2248 | bool IsDisabled = RecipType[0] == DisabledPrefix; |
2249 | if (IsDisabled) |
2250 | RecipType = RecipType.substr(Start: 1); |
2251 | |
2252 | if (RecipType.equals(RHS: VTName) || RecipType.equals(RHS: VTNameNoSize)) |
2253 | return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled |
2254 | : TargetLoweringBase::ReciprocalEstimate::Enabled; |
2255 | } |
2256 | |
2257 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2258 | } |
2259 | |
2260 | /// For the input attribute string, return the customized refinement step count |
2261 | /// for this operation on the specified data type. If the step count does not |
2262 | /// exist, return the ReciprocalEstimate enum value for unspecified. |
2263 | static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { |
2264 | if (Override.empty()) |
2265 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2266 | |
2267 | SmallVector<StringRef, 4> OverrideVector; |
2268 | Override.split(A&: OverrideVector, Separator: ','); |
2269 | unsigned NumArgs = OverrideVector.size(); |
2270 | |
2271 | // Check if "all", "default", or "none" was specified. |
2272 | if (NumArgs == 1) { |
2273 | // Look for an optional setting of the number of refinement steps needed |
2274 | // for this type of reciprocal operation. |
2275 | size_t RefPos; |
2276 | uint8_t RefSteps; |
2277 | if (!parseRefinementStep(In: Override, Position&: RefPos, Value&: RefSteps)) |
2278 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2279 | |
2280 | // Split the string for further processing. |
2281 | Override = Override.substr(Start: 0, N: RefPos); |
2282 | assert(Override != "none" && |
2283 | "Disabled reciprocals, but specifed refinement steps?" ); |
2284 | |
2285 | // If this is a general override, return the specified number of steps. |
2286 | if (Override == "all" || Override == "default" ) |
2287 | return RefSteps; |
2288 | } |
2289 | |
2290 | // The attribute string may omit the size suffix ('f'/'d'). |
2291 | std::string VTName = getReciprocalOpName(IsSqrt, VT); |
2292 | std::string VTNameNoSize = VTName; |
2293 | VTNameNoSize.pop_back(); |
2294 | |
2295 | for (StringRef RecipType : OverrideVector) { |
2296 | size_t RefPos; |
2297 | uint8_t RefSteps; |
2298 | if (!parseRefinementStep(In: RecipType, Position&: RefPos, Value&: RefSteps)) |
2299 | continue; |
2300 | |
2301 | RecipType = RecipType.substr(Start: 0, N: RefPos); |
2302 | if (RecipType.equals(RHS: VTName) || RecipType.equals(RHS: VTNameNoSize)) |
2303 | return RefSteps; |
2304 | } |
2305 | |
2306 | return TargetLoweringBase::ReciprocalEstimate::Unspecified; |
2307 | } |
2308 | |
2309 | int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT, |
2310 | MachineFunction &MF) const { |
2311 | return getOpEnabled(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2312 | } |
2313 | |
2314 | int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT, |
2315 | MachineFunction &MF) const { |
2316 | return getOpEnabled(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2317 | } |
2318 | |
2319 | int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, |
2320 | MachineFunction &MF) const { |
2321 | return getOpRefinementSteps(IsSqrt: true, VT, Override: getRecipEstimateForFunc(MF)); |
2322 | } |
2323 | |
2324 | int TargetLoweringBase::getDivRefinementSteps(EVT VT, |
2325 | MachineFunction &MF) const { |
2326 | return getOpRefinementSteps(IsSqrt: false, VT, Override: getRecipEstimateForFunc(MF)); |
2327 | } |
2328 | |
2329 | bool TargetLoweringBase::isLoadBitCastBeneficial( |
2330 | EVT LoadVT, EVT BitcastVT, const SelectionDAG &DAG, |
2331 | const MachineMemOperand &MMO) const { |
2332 | // Single-element vectors are scalarized, so we should generally avoid having |
2333 | // any memory operations on such types, as they would get scalarized too. |
2334 | if (LoadVT.isFixedLengthVector() && BitcastVT.isFixedLengthVector() && |
2335 | BitcastVT.getVectorNumElements() == 1) |
2336 | return false; |
2337 | |
2338 | // Don't do if we could do an indexed load on the original type, but not on |
2339 | // the new one. |
2340 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
2341 | return true; |
2342 | |
2343 | MVT LoadMVT = LoadVT.getSimpleVT(); |
2344 | |
2345 | // Don't bother doing this if it's just going to be promoted again later, as |
2346 | // doing so might interfere with other combines. |
2347 | if (getOperationAction(Op: ISD::LOAD, VT: LoadMVT) == Promote && |
2348 | getTypeToPromoteTo(Op: ISD::LOAD, VT: LoadMVT) == BitcastVT.getSimpleVT()) |
2349 | return false; |
2350 | |
2351 | unsigned Fast = 0; |
2352 | return allowsMemoryAccess(Context&: *DAG.getContext(), DL: DAG.getDataLayout(), VT: BitcastVT, |
2353 | MMO, Fast: &Fast) && |
2354 | Fast; |
2355 | } |
2356 | |
2357 | void TargetLoweringBase::finalizeLowering(MachineFunction &MF) const { |
2358 | MF.getRegInfo().freezeReservedRegs(); |
2359 | } |
2360 | |
2361 | MachineMemOperand::Flags TargetLoweringBase::getLoadMemOperandFlags( |
2362 | const LoadInst &LI, const DataLayout &DL, AssumptionCache *AC, |
2363 | const TargetLibraryInfo *LibInfo) const { |
2364 | MachineMemOperand::Flags Flags = MachineMemOperand::MOLoad; |
2365 | if (LI.isVolatile()) |
2366 | Flags |= MachineMemOperand::MOVolatile; |
2367 | |
2368 | if (LI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2369 | Flags |= MachineMemOperand::MONonTemporal; |
2370 | |
2371 | if (LI.hasMetadata(KindID: LLVMContext::MD_invariant_load)) |
2372 | Flags |= MachineMemOperand::MOInvariant; |
2373 | |
2374 | if (isDereferenceableAndAlignedPointer(V: LI.getPointerOperand(), Ty: LI.getType(), |
2375 | Alignment: LI.getAlign(), DL, CtxI: &LI, AC, |
2376 | /*DT=*/nullptr, TLI: LibInfo)) |
2377 | Flags |= MachineMemOperand::MODereferenceable; |
2378 | |
2379 | Flags |= getTargetMMOFlags(I: LI); |
2380 | return Flags; |
2381 | } |
2382 | |
2383 | MachineMemOperand::Flags |
2384 | TargetLoweringBase::getStoreMemOperandFlags(const StoreInst &SI, |
2385 | const DataLayout &DL) const { |
2386 | MachineMemOperand::Flags Flags = MachineMemOperand::MOStore; |
2387 | |
2388 | if (SI.isVolatile()) |
2389 | Flags |= MachineMemOperand::MOVolatile; |
2390 | |
2391 | if (SI.hasMetadata(KindID: LLVMContext::MD_nontemporal)) |
2392 | Flags |= MachineMemOperand::MONonTemporal; |
2393 | |
2394 | // FIXME: Not preserving dereferenceable |
2395 | Flags |= getTargetMMOFlags(I: SI); |
2396 | return Flags; |
2397 | } |
2398 | |
2399 | MachineMemOperand::Flags |
2400 | TargetLoweringBase::getAtomicMemOperandFlags(const Instruction &AI, |
2401 | const DataLayout &DL) const { |
2402 | auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; |
2403 | |
2404 | if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Val: &AI)) { |
2405 | if (RMW->isVolatile()) |
2406 | Flags |= MachineMemOperand::MOVolatile; |
2407 | } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Val: &AI)) { |
2408 | if (CmpX->isVolatile()) |
2409 | Flags |= MachineMemOperand::MOVolatile; |
2410 | } else |
2411 | llvm_unreachable("not an atomic instruction" ); |
2412 | |
2413 | // FIXME: Not preserving dereferenceable |
2414 | Flags |= getTargetMMOFlags(I: AI); |
2415 | return Flags; |
2416 | } |
2417 | |
2418 | Instruction *TargetLoweringBase::emitLeadingFence(IRBuilderBase &Builder, |
2419 | Instruction *Inst, |
2420 | AtomicOrdering Ord) const { |
2421 | if (isReleaseOrStronger(AO: Ord) && Inst->hasAtomicStore()) |
2422 | return Builder.CreateFence(Ordering: Ord); |
2423 | else |
2424 | return nullptr; |
2425 | } |
2426 | |
2427 | Instruction *TargetLoweringBase::emitTrailingFence(IRBuilderBase &Builder, |
2428 | Instruction *Inst, |
2429 | AtomicOrdering Ord) const { |
2430 | if (isAcquireOrStronger(AO: Ord)) |
2431 | return Builder.CreateFence(Ordering: Ord); |
2432 | else |
2433 | return nullptr; |
2434 | } |
2435 | |
2436 | //===----------------------------------------------------------------------===// |
2437 | // GlobalISel Hooks |
2438 | //===----------------------------------------------------------------------===// |
2439 | |
2440 | bool TargetLoweringBase::shouldLocalize(const MachineInstr &MI, |
2441 | const TargetTransformInfo *TTI) const { |
2442 | auto &MF = *MI.getMF(); |
2443 | auto &MRI = MF.getRegInfo(); |
2444 | // Assuming a spill and reload of a value has a cost of 1 instruction each, |
2445 | // this helper function computes the maximum number of uses we should consider |
2446 | // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We |
2447 | // break even in terms of code size when the original MI has 2 users vs |
2448 | // choosing to potentially spill. Any more than 2 users we we have a net code |
2449 | // size increase. This doesn't take into account register pressure though. |
2450 | auto maxUses = [](unsigned RematCost) { |
2451 | // A cost of 1 means remats are basically free. |
2452 | if (RematCost == 1) |
2453 | return std::numeric_limits<unsigned>::max(); |
2454 | if (RematCost == 2) |
2455 | return 2U; |
2456 | |
2457 | // Remat is too expensive, only sink if there's one user. |
2458 | if (RematCost > 2) |
2459 | return 1U; |
2460 | llvm_unreachable("Unexpected remat cost" ); |
2461 | }; |
2462 | |
2463 | switch (MI.getOpcode()) { |
2464 | default: |
2465 | return false; |
2466 | // Constants-like instructions should be close to their users. |
2467 | // We don't want long live-ranges for them. |
2468 | case TargetOpcode::G_CONSTANT: |
2469 | case TargetOpcode::G_FCONSTANT: |
2470 | case TargetOpcode::G_FRAME_INDEX: |
2471 | case TargetOpcode::G_INTTOPTR: |
2472 | return true; |
2473 | case TargetOpcode::G_GLOBAL_VALUE: { |
2474 | unsigned RematCost = TTI->getGISelRematGlobalCost(); |
2475 | Register Reg = MI.getOperand(i: 0).getReg(); |
2476 | unsigned MaxUses = maxUses(RematCost); |
2477 | if (MaxUses == UINT_MAX) |
2478 | return true; // Remats are "free" so always localize. |
2479 | return MRI.hasAtMostUserInstrs(Reg, MaxUsers: MaxUses); |
2480 | } |
2481 | } |
2482 | } |
2483 | |