1 | //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SystemZTargetLowering class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SystemZISelLowering.h" |
14 | #include "SystemZCallingConv.h" |
15 | #include "SystemZConstantPoolValue.h" |
16 | #include "SystemZMachineFunctionInfo.h" |
17 | #include "SystemZTargetMachine.h" |
18 | #include "llvm/CodeGen/CallingConvLower.h" |
19 | #include "llvm/CodeGen/ISDOpcodes.h" |
20 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
21 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
22 | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
23 | #include "llvm/IR/IntrinsicInst.h" |
24 | #include "llvm/IR/Intrinsics.h" |
25 | #include "llvm/IR/IntrinsicsS390.h" |
26 | #include "llvm/Support/CommandLine.h" |
27 | #include "llvm/Support/ErrorHandling.h" |
28 | #include "llvm/Support/KnownBits.h" |
29 | #include <cctype> |
30 | #include <optional> |
31 | |
32 | using namespace llvm; |
33 | |
34 | #define DEBUG_TYPE "systemz-lower" |
35 | |
36 | namespace { |
37 | // Represents information about a comparison. |
38 | struct Comparison { |
39 | Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) |
40 | : Op0(Op0In), Op1(Op1In), Chain(ChainIn), |
41 | Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} |
42 | |
43 | // The operands to the comparison. |
44 | SDValue Op0, Op1; |
45 | |
46 | // Chain if this is a strict floating-point comparison. |
47 | SDValue Chain; |
48 | |
49 | // The opcode that should be used to compare Op0 and Op1. |
50 | unsigned Opcode; |
51 | |
52 | // A SystemZICMP value. Only used for integer comparisons. |
53 | unsigned ICmpType; |
54 | |
55 | // The mask of CC values that Opcode can produce. |
56 | unsigned CCValid; |
57 | |
58 | // The mask of CC values for which the original condition is true. |
59 | unsigned CCMask; |
60 | }; |
61 | } // end anonymous namespace |
62 | |
63 | // Classify VT as either 32 or 64 bit. |
64 | static bool is32Bit(EVT VT) { |
65 | switch (VT.getSimpleVT().SimpleTy) { |
66 | case MVT::i32: |
67 | return true; |
68 | case MVT::i64: |
69 | return false; |
70 | default: |
71 | llvm_unreachable("Unsupported type" ); |
72 | } |
73 | } |
74 | |
75 | // Return a version of MachineOperand that can be safely used before the |
76 | // final use. |
77 | static MachineOperand earlyUseOperand(MachineOperand Op) { |
78 | if (Op.isReg()) |
79 | Op.setIsKill(false); |
80 | return Op; |
81 | } |
82 | |
83 | SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, |
84 | const SystemZSubtarget &STI) |
85 | : TargetLowering(TM), Subtarget(STI) { |
86 | MVT PtrVT = MVT::getIntegerVT(BitWidth: TM.getPointerSizeInBits(AS: 0)); |
87 | |
88 | auto *Regs = STI.getSpecialRegisters(); |
89 | |
90 | // Set up the register classes. |
91 | if (Subtarget.hasHighWord()) |
92 | addRegisterClass(MVT::VT: i32, RC: &SystemZ::GRX32BitRegClass); |
93 | else |
94 | addRegisterClass(MVT::VT: i32, RC: &SystemZ::GR32BitRegClass); |
95 | addRegisterClass(MVT::VT: i64, RC: &SystemZ::GR64BitRegClass); |
96 | if (!useSoftFloat()) { |
97 | if (Subtarget.hasVector()) { |
98 | addRegisterClass(MVT::VT: f32, RC: &SystemZ::VR32BitRegClass); |
99 | addRegisterClass(MVT::VT: f64, RC: &SystemZ::VR64BitRegClass); |
100 | } else { |
101 | addRegisterClass(MVT::VT: f32, RC: &SystemZ::FP32BitRegClass); |
102 | addRegisterClass(MVT::VT: f64, RC: &SystemZ::FP64BitRegClass); |
103 | } |
104 | if (Subtarget.hasVectorEnhancements1()) |
105 | addRegisterClass(MVT::VT: f128, RC: &SystemZ::VR128BitRegClass); |
106 | else |
107 | addRegisterClass(MVT::VT: f128, RC: &SystemZ::FP128BitRegClass); |
108 | |
109 | if (Subtarget.hasVector()) { |
110 | addRegisterClass(MVT::VT: v16i8, RC: &SystemZ::VR128BitRegClass); |
111 | addRegisterClass(MVT::VT: v8i16, RC: &SystemZ::VR128BitRegClass); |
112 | addRegisterClass(MVT::VT: v4i32, RC: &SystemZ::VR128BitRegClass); |
113 | addRegisterClass(MVT::VT: v2i64, RC: &SystemZ::VR128BitRegClass); |
114 | addRegisterClass(MVT::VT: v4f32, RC: &SystemZ::VR128BitRegClass); |
115 | addRegisterClass(MVT::VT: v2f64, RC: &SystemZ::VR128BitRegClass); |
116 | } |
117 | |
118 | if (Subtarget.hasVector()) |
119 | addRegisterClass(MVT::VT: i128, RC: &SystemZ::VR128BitRegClass); |
120 | } |
121 | |
122 | // Compute derived properties from the register classes |
123 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
124 | |
125 | // Set up special registers. |
126 | setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); |
127 | |
128 | // TODO: It may be better to default to latency-oriented scheduling, however |
129 | // LLVM's current latency-oriented scheduler can't handle physreg definitions |
130 | // such as SystemZ has with CC, so set this to the register-pressure |
131 | // scheduler, because it can. |
132 | setSchedulingPreference(Sched::RegPressure); |
133 | |
134 | setBooleanContents(ZeroOrOneBooleanContent); |
135 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
136 | |
137 | setMaxAtomicSizeInBitsSupported(128); |
138 | |
139 | // Instructions are strings of 2-byte aligned 2-byte values. |
140 | setMinFunctionAlignment(Align(2)); |
141 | // For performance reasons we prefer 16-byte alignment. |
142 | setPrefFunctionAlignment(Align(16)); |
143 | |
144 | // Handle operations that are handled in a similar way for all types. |
145 | for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; |
146 | I <= MVT::LAST_FP_VALUETYPE; |
147 | ++I) { |
148 | MVT VT = MVT::SimpleValueType(I); |
149 | if (isTypeLegal(VT)) { |
150 | // Lower SET_CC into an IPM-based sequence. |
151 | setOperationAction(Op: ISD::SETCC, VT, Action: Custom); |
152 | setOperationAction(Op: ISD::STRICT_FSETCC, VT, Action: Custom); |
153 | setOperationAction(Op: ISD::STRICT_FSETCCS, VT, Action: Custom); |
154 | |
155 | // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). |
156 | setOperationAction(Op: ISD::SELECT, VT, Action: Expand); |
157 | |
158 | // Lower SELECT_CC and BR_CC into separate comparisons and branches. |
159 | setOperationAction(Op: ISD::SELECT_CC, VT, Action: Custom); |
160 | setOperationAction(Op: ISD::BR_CC, VT, Action: Custom); |
161 | } |
162 | } |
163 | |
164 | // Expand jump table branches as address arithmetic followed by an |
165 | // indirect jump. |
166 | setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
167 | |
168 | // Expand BRCOND into a BR_CC (see above). |
169 | setOperationAction(ISD::BRCOND, MVT::Other, Expand); |
170 | |
171 | // Handle integer types except i128. |
172 | for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; |
173 | I <= MVT::LAST_INTEGER_VALUETYPE; |
174 | ++I) { |
175 | MVT VT = MVT::SimpleValueType(I); |
176 | if (isTypeLegal(VT) && VT != MVT::i128) { |
177 | setOperationAction(Op: ISD::ABS, VT, Action: Legal); |
178 | |
179 | // Expand individual DIV and REMs into DIVREMs. |
180 | setOperationAction(Op: ISD::SDIV, VT, Action: Expand); |
181 | setOperationAction(Op: ISD::UDIV, VT, Action: Expand); |
182 | setOperationAction(Op: ISD::SREM, VT, Action: Expand); |
183 | setOperationAction(Op: ISD::UREM, VT, Action: Expand); |
184 | setOperationAction(Op: ISD::SDIVREM, VT, Action: Custom); |
185 | setOperationAction(Op: ISD::UDIVREM, VT, Action: Custom); |
186 | |
187 | // Support addition/subtraction with overflow. |
188 | setOperationAction(Op: ISD::SADDO, VT, Action: Custom); |
189 | setOperationAction(Op: ISD::SSUBO, VT, Action: Custom); |
190 | |
191 | // Support addition/subtraction with carry. |
192 | setOperationAction(Op: ISD::UADDO, VT, Action: Custom); |
193 | setOperationAction(Op: ISD::USUBO, VT, Action: Custom); |
194 | |
195 | // Support carry in as value rather than glue. |
196 | setOperationAction(Op: ISD::UADDO_CARRY, VT, Action: Custom); |
197 | setOperationAction(Op: ISD::USUBO_CARRY, VT, Action: Custom); |
198 | |
199 | // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are |
200 | // available, or if the operand is constant. |
201 | setOperationAction(Op: ISD::ATOMIC_LOAD_SUB, VT, Action: Custom); |
202 | |
203 | // Use POPCNT on z196 and above. |
204 | if (Subtarget.hasPopulationCount()) |
205 | setOperationAction(Op: ISD::CTPOP, VT, Action: Custom); |
206 | else |
207 | setOperationAction(Op: ISD::CTPOP, VT, Action: Expand); |
208 | |
209 | // No special instructions for these. |
210 | setOperationAction(Op: ISD::CTTZ, VT, Action: Expand); |
211 | setOperationAction(Op: ISD::ROTR, VT, Action: Expand); |
212 | |
213 | // Use *MUL_LOHI where possible instead of MULH*. |
214 | setOperationAction(Op: ISD::MULHS, VT, Action: Expand); |
215 | setOperationAction(Op: ISD::MULHU, VT, Action: Expand); |
216 | setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Custom); |
217 | setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Custom); |
218 | |
219 | // Only z196 and above have native support for conversions to unsigned. |
220 | // On z10, promoting to i64 doesn't generate an inexact condition for |
221 | // values that are outside the i32 range but in the i64 range, so use |
222 | // the default expansion. |
223 | if (!Subtarget.hasFPExtension()) |
224 | setOperationAction(Op: ISD::FP_TO_UINT, VT, Action: Expand); |
225 | |
226 | // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all |
227 | // default to Expand, so need to be modified to Legal where appropriate. |
228 | setOperationAction(Op: ISD::STRICT_FP_TO_SINT, VT, Action: Legal); |
229 | if (Subtarget.hasFPExtension()) |
230 | setOperationAction(Op: ISD::STRICT_FP_TO_UINT, VT, Action: Legal); |
231 | |
232 | // And similarly for STRICT_[SU]INT_TO_FP. |
233 | setOperationAction(Op: ISD::STRICT_SINT_TO_FP, VT, Action: Legal); |
234 | if (Subtarget.hasFPExtension()) |
235 | setOperationAction(Op: ISD::STRICT_UINT_TO_FP, VT, Action: Legal); |
236 | } |
237 | } |
238 | |
239 | // Handle i128 if legal. |
240 | if (isTypeLegal(MVT::i128)) { |
241 | // No special instructions for these. |
242 | setOperationAction(ISD::SDIVREM, MVT::i128, Expand); |
243 | setOperationAction(ISD::UDIVREM, MVT::i128, Expand); |
244 | setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); |
245 | setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); |
246 | setOperationAction(ISD::ROTR, MVT::i128, Expand); |
247 | setOperationAction(ISD::ROTL, MVT::i128, Expand); |
248 | setOperationAction(ISD::MUL, MVT::i128, Expand); |
249 | setOperationAction(ISD::MULHS, MVT::i128, Expand); |
250 | setOperationAction(ISD::MULHU, MVT::i128, Expand); |
251 | setOperationAction(ISD::SDIV, MVT::i128, Expand); |
252 | setOperationAction(ISD::UDIV, MVT::i128, Expand); |
253 | setOperationAction(ISD::SREM, MVT::i128, Expand); |
254 | setOperationAction(ISD::UREM, MVT::i128, Expand); |
255 | setOperationAction(ISD::CTLZ, MVT::i128, Expand); |
256 | setOperationAction(ISD::CTTZ, MVT::i128, Expand); |
257 | |
258 | // Support addition/subtraction with carry. |
259 | setOperationAction(ISD::UADDO, MVT::i128, Custom); |
260 | setOperationAction(ISD::USUBO, MVT::i128, Custom); |
261 | setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); |
262 | setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); |
263 | |
264 | // Use VPOPCT and add up partial results. |
265 | setOperationAction(ISD::CTPOP, MVT::i128, Custom); |
266 | |
267 | // We have to use libcalls for these. |
268 | setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); |
269 | setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); |
270 | setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); |
271 | setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); |
272 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); |
273 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); |
274 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); |
275 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); |
276 | } |
277 | |
278 | // Type legalization will convert 8- and 16-bit atomic operations into |
279 | // forms that operate on i32s (but still keeping the original memory VT). |
280 | // Lower them into full i32 operations. |
281 | setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); |
282 | setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); |
283 | setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); |
284 | setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); |
285 | setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); |
286 | setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); |
287 | setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); |
288 | setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); |
289 | setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); |
290 | setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); |
291 | setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); |
292 | |
293 | // Whether or not i128 is not a legal type, we need to custom lower |
294 | // the atomic operations in order to exploit SystemZ instructions. |
295 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); |
296 | setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); |
297 | |
298 | // Mark sign/zero extending atomic loads as legal, which will make |
299 | // DAGCombiner fold extensions into atomic loads if possible. |
300 | setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, |
301 | {MVT::i8, MVT::i16, MVT::i32}, Legal); |
302 | setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, |
303 | {MVT::i8, MVT::i16}, Legal); |
304 | setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, |
305 | MVT::i8, Legal); |
306 | |
307 | // We can use the CC result of compare-and-swap to implement |
308 | // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. |
309 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); |
310 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); |
311 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
312 | |
313 | setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); |
314 | |
315 | // Traps are legal, as we will convert them to "j .+2". |
316 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
317 | |
318 | // z10 has instructions for signed but not unsigned FP conversion. |
319 | // Handle unsigned 32-bit types as signed 64-bit types. |
320 | if (!Subtarget.hasFPExtension()) { |
321 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); |
322 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); |
323 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); |
324 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); |
325 | } |
326 | |
327 | // We have native support for a 64-bit CTLZ, via FLOGR. |
328 | setOperationAction(ISD::CTLZ, MVT::i32, Promote); |
329 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); |
330 | setOperationAction(ISD::CTLZ, MVT::i64, Legal); |
331 | |
332 | // On z15 we have native support for a 64-bit CTPOP. |
333 | if (Subtarget.hasMiscellaneousExtensions3()) { |
334 | setOperationAction(ISD::CTPOP, MVT::i32, Promote); |
335 | setOperationAction(ISD::CTPOP, MVT::i64, Legal); |
336 | } |
337 | |
338 | // Give LowerOperation the chance to replace 64-bit ORs with subregs. |
339 | setOperationAction(ISD::OR, MVT::i64, Custom); |
340 | |
341 | // Expand 128 bit shifts without using a libcall. |
342 | setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); |
343 | setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); |
344 | setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); |
345 | setLibcallName(Call: RTLIB::SRL_I128, Name: nullptr); |
346 | setLibcallName(Call: RTLIB::SHL_I128, Name: nullptr); |
347 | setLibcallName(Call: RTLIB::SRA_I128, Name: nullptr); |
348 | |
349 | // Also expand 256 bit shifts if i128 is a legal type. |
350 | if (isTypeLegal(MVT::i128)) { |
351 | setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); |
352 | setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); |
353 | setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); |
354 | } |
355 | |
356 | // Handle bitcast from fp128 to i128. |
357 | if (!isTypeLegal(MVT::i128)) |
358 | setOperationAction(ISD::BITCAST, MVT::i128, Custom); |
359 | |
360 | // We have native instructions for i8, i16 and i32 extensions, but not i1. |
361 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); |
362 | for (MVT VT : MVT::integer_valuetypes()) { |
363 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
364 | setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); |
365 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); |
366 | } |
367 | |
368 | // Handle the various types of symbolic address. |
369 | setOperationAction(Op: ISD::ConstantPool, VT: PtrVT, Action: Custom); |
370 | setOperationAction(Op: ISD::GlobalAddress, VT: PtrVT, Action: Custom); |
371 | setOperationAction(Op: ISD::GlobalTLSAddress, VT: PtrVT, Action: Custom); |
372 | setOperationAction(Op: ISD::BlockAddress, VT: PtrVT, Action: Custom); |
373 | setOperationAction(Op: ISD::JumpTable, VT: PtrVT, Action: Custom); |
374 | |
375 | // We need to handle dynamic allocations specially because of the |
376 | // 160-byte area at the bottom of the stack. |
377 | setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: PtrVT, Action: Custom); |
378 | setOperationAction(Op: ISD::GET_DYNAMIC_AREA_OFFSET, VT: PtrVT, Action: Custom); |
379 | |
380 | setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); |
381 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); |
382 | |
383 | // Handle prefetches with PFD or PFDRL. |
384 | setOperationAction(ISD::PREFETCH, MVT::Other, Custom); |
385 | |
386 | // Handle readcyclecounter with STCKF. |
387 | setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); |
388 | |
389 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
390 | // Assume by default that all vector operations need to be expanded. |
391 | for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) |
392 | if (getOperationAction(Opcode, VT) == Legal) |
393 | setOperationAction(Opcode, VT, Expand); |
394 | |
395 | // Likewise all truncating stores and extending loads. |
396 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
397 | setTruncStoreAction(VT, InnerVT, Expand); |
398 | setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); |
399 | setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); |
400 | setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); |
401 | } |
402 | |
403 | if (isTypeLegal(VT)) { |
404 | // These operations are legal for anything that can be stored in a |
405 | // vector register, even if there is no native support for the format |
406 | // as such. In particular, we can do these for v4f32 even though there |
407 | // are no specific instructions for that format. |
408 | setOperationAction(ISD::LOAD, VT, Legal); |
409 | setOperationAction(ISD::STORE, VT, Legal); |
410 | setOperationAction(ISD::VSELECT, VT, Legal); |
411 | setOperationAction(ISD::BITCAST, VT, Legal); |
412 | setOperationAction(ISD::UNDEF, VT, Legal); |
413 | |
414 | // Likewise, except that we need to replace the nodes with something |
415 | // more specific. |
416 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
417 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
418 | } |
419 | } |
420 | |
421 | // Handle integer vector types. |
422 | for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { |
423 | if (isTypeLegal(VT)) { |
424 | // These operations have direct equivalents. |
425 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); |
426 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); |
427 | setOperationAction(ISD::ADD, VT, Legal); |
428 | setOperationAction(ISD::SUB, VT, Legal); |
429 | if (VT != MVT::v2i64) |
430 | setOperationAction(ISD::MUL, VT, Legal); |
431 | setOperationAction(ISD::ABS, VT, Legal); |
432 | setOperationAction(ISD::AND, VT, Legal); |
433 | setOperationAction(ISD::OR, VT, Legal); |
434 | setOperationAction(ISD::XOR, VT, Legal); |
435 | if (Subtarget.hasVectorEnhancements1()) |
436 | setOperationAction(ISD::CTPOP, VT, Legal); |
437 | else |
438 | setOperationAction(ISD::CTPOP, VT, Custom); |
439 | setOperationAction(ISD::CTTZ, VT, Legal); |
440 | setOperationAction(ISD::CTLZ, VT, Legal); |
441 | |
442 | // Convert a GPR scalar to a vector by inserting it into element 0. |
443 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
444 | |
445 | // Use a series of unpacks for extensions. |
446 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
447 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
448 | |
449 | // Detect shifts/rotates by a scalar amount and convert them into |
450 | // V*_BY_SCALAR. |
451 | setOperationAction(ISD::SHL, VT, Custom); |
452 | setOperationAction(ISD::SRA, VT, Custom); |
453 | setOperationAction(ISD::SRL, VT, Custom); |
454 | setOperationAction(ISD::ROTL, VT, Custom); |
455 | |
456 | // Add ISD::VECREDUCE_ADD as custom in order to implement |
457 | // it with VZERO+VSUM |
458 | setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); |
459 | |
460 | // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands |
461 | // and inverting the result as necessary. |
462 | setOperationAction(ISD::SETCC, VT, Custom); |
463 | } |
464 | } |
465 | |
466 | if (Subtarget.hasVector()) { |
467 | // There should be no need to check for float types other than v2f64 |
468 | // since <2 x f32> isn't a legal type. |
469 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); |
470 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); |
471 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); |
472 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); |
473 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); |
474 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); |
475 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); |
476 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); |
477 | |
478 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); |
479 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); |
480 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); |
481 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); |
482 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); |
483 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); |
484 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); |
485 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); |
486 | } |
487 | |
488 | if (Subtarget.hasVectorEnhancements2()) { |
489 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
490 | setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); |
491 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); |
492 | setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); |
493 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
494 | setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); |
495 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); |
496 | setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); |
497 | |
498 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); |
499 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); |
500 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); |
501 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); |
502 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); |
503 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); |
504 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); |
505 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); |
506 | } |
507 | |
508 | // Handle floating-point types. |
509 | for (unsigned I = MVT::FIRST_FP_VALUETYPE; |
510 | I <= MVT::LAST_FP_VALUETYPE; |
511 | ++I) { |
512 | MVT VT = MVT::SimpleValueType(I); |
513 | if (isTypeLegal(VT)) { |
514 | // We can use FI for FRINT. |
515 | setOperationAction(ISD::FRINT, VT, Legal); |
516 | |
517 | // We can use the extended form of FI for other rounding operations. |
518 | if (Subtarget.hasFPExtension()) { |
519 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
520 | setOperationAction(ISD::FFLOOR, VT, Legal); |
521 | setOperationAction(ISD::FCEIL, VT, Legal); |
522 | setOperationAction(ISD::FTRUNC, VT, Legal); |
523 | setOperationAction(ISD::FROUND, VT, Legal); |
524 | } |
525 | |
526 | // No special instructions for these. |
527 | setOperationAction(ISD::FSIN, VT, Expand); |
528 | setOperationAction(ISD::FCOS, VT, Expand); |
529 | setOperationAction(ISD::FSINCOS, VT, Expand); |
530 | setOperationAction(ISD::FREM, VT, Expand); |
531 | setOperationAction(ISD::FPOW, VT, Expand); |
532 | |
533 | // Special treatment. |
534 | setOperationAction(ISD::IS_FPCLASS, VT, Custom); |
535 | |
536 | // Handle constrained floating-point operations. |
537 | setOperationAction(ISD::STRICT_FADD, VT, Legal); |
538 | setOperationAction(ISD::STRICT_FSUB, VT, Legal); |
539 | setOperationAction(ISD::STRICT_FMUL, VT, Legal); |
540 | setOperationAction(ISD::STRICT_FDIV, VT, Legal); |
541 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
542 | setOperationAction(ISD::STRICT_FSQRT, VT, Legal); |
543 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
544 | setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); |
545 | setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); |
546 | if (Subtarget.hasFPExtension()) { |
547 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
548 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
549 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
550 | setOperationAction(ISD::STRICT_FROUND, VT, Legal); |
551 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
552 | } |
553 | } |
554 | } |
555 | |
556 | // Handle floating-point vector types. |
557 | if (Subtarget.hasVector()) { |
558 | // Scalar-to-vector conversion is just a subreg. |
559 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); |
560 | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); |
561 | |
562 | // Some insertions and extractions can be done directly but others |
563 | // need to go via integers. |
564 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
565 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); |
566 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
567 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); |
568 | |
569 | // These operations have direct equivalents. |
570 | setOperationAction(ISD::FADD, MVT::v2f64, Legal); |
571 | setOperationAction(ISD::FNEG, MVT::v2f64, Legal); |
572 | setOperationAction(ISD::FSUB, MVT::v2f64, Legal); |
573 | setOperationAction(ISD::FMUL, MVT::v2f64, Legal); |
574 | setOperationAction(ISD::FMA, MVT::v2f64, Legal); |
575 | setOperationAction(ISD::FDIV, MVT::v2f64, Legal); |
576 | setOperationAction(ISD::FABS, MVT::v2f64, Legal); |
577 | setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); |
578 | setOperationAction(ISD::FRINT, MVT::v2f64, Legal); |
579 | setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); |
580 | setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); |
581 | setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); |
582 | setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); |
583 | setOperationAction(ISD::FROUND, MVT::v2f64, Legal); |
584 | |
585 | // Handle constrained floating-point operations. |
586 | setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); |
587 | setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); |
588 | setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); |
589 | setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); |
590 | setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); |
591 | setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); |
592 | setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); |
593 | setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); |
594 | setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); |
595 | setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); |
596 | setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); |
597 | setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); |
598 | |
599 | setOperationAction(ISD::SETCC, MVT::v2f64, Custom); |
600 | setOperationAction(ISD::SETCC, MVT::v4f32, Custom); |
601 | setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); |
602 | setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); |
603 | if (Subtarget.hasVectorEnhancements1()) { |
604 | setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); |
605 | setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); |
606 | } |
607 | } |
608 | |
609 | // The vector enhancements facility 1 has instructions for these. |
610 | if (Subtarget.hasVectorEnhancements1()) { |
611 | setOperationAction(ISD::FADD, MVT::v4f32, Legal); |
612 | setOperationAction(ISD::FNEG, MVT::v4f32, Legal); |
613 | setOperationAction(ISD::FSUB, MVT::v4f32, Legal); |
614 | setOperationAction(ISD::FMUL, MVT::v4f32, Legal); |
615 | setOperationAction(ISD::FMA, MVT::v4f32, Legal); |
616 | setOperationAction(ISD::FDIV, MVT::v4f32, Legal); |
617 | setOperationAction(ISD::FABS, MVT::v4f32, Legal); |
618 | setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); |
619 | setOperationAction(ISD::FRINT, MVT::v4f32, Legal); |
620 | setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); |
621 | setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); |
622 | setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); |
623 | setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); |
624 | setOperationAction(ISD::FROUND, MVT::v4f32, Legal); |
625 | |
626 | setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); |
627 | setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); |
628 | setOperationAction(ISD::FMINNUM, MVT::f64, Legal); |
629 | setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); |
630 | |
631 | setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); |
632 | setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); |
633 | setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); |
634 | setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); |
635 | |
636 | setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); |
637 | setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); |
638 | setOperationAction(ISD::FMINNUM, MVT::f32, Legal); |
639 | setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); |
640 | |
641 | setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); |
642 | setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); |
643 | setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); |
644 | setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); |
645 | |
646 | setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); |
647 | setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); |
648 | setOperationAction(ISD::FMINNUM, MVT::f128, Legal); |
649 | setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); |
650 | |
651 | // Handle constrained floating-point operations. |
652 | setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); |
653 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); |
654 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); |
655 | setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); |
656 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); |
657 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); |
658 | setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); |
659 | setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); |
660 | setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); |
661 | setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); |
662 | setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); |
663 | setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); |
664 | for (auto VT : { MVT::f32, MVT::f64, MVT::f128, |
665 | MVT::v4f32, MVT::v2f64 }) { |
666 | setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); |
667 | setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); |
668 | setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); |
669 | setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); |
670 | } |
671 | } |
672 | |
673 | // We only have fused f128 multiply-addition on vector registers. |
674 | if (!Subtarget.hasVectorEnhancements1()) { |
675 | setOperationAction(ISD::FMA, MVT::f128, Expand); |
676 | setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); |
677 | } |
678 | |
679 | // We don't have a copysign instruction on vector registers. |
680 | if (Subtarget.hasVectorEnhancements1()) |
681 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); |
682 | |
683 | // Needed so that we don't try to implement f128 constant loads using |
684 | // a load-and-extend of a f80 constant (in cases where the constant |
685 | // would fit in an f80). |
686 | for (MVT VT : MVT::fp_valuetypes()) |
687 | setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); |
688 | |
689 | // We don't have extending load instruction on vector registers. |
690 | if (Subtarget.hasVectorEnhancements1()) { |
691 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
692 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
693 | } |
694 | |
695 | // Floating-point truncation and stores need to be done separately. |
696 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
697 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
698 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
699 | |
700 | // We have 64-bit FPR<->GPR moves, but need special handling for |
701 | // 32-bit forms. |
702 | if (!Subtarget.hasVector()) { |
703 | setOperationAction(ISD::BITCAST, MVT::i32, Custom); |
704 | setOperationAction(ISD::BITCAST, MVT::f32, Custom); |
705 | } |
706 | |
707 | // VASTART and VACOPY need to deal with the SystemZ-specific varargs |
708 | // structure, but VAEND is a no-op. |
709 | setOperationAction(ISD::VASTART, MVT::Other, Custom); |
710 | setOperationAction(ISD::VACOPY, MVT::Other, Custom); |
711 | setOperationAction(ISD::VAEND, MVT::Other, Expand); |
712 | |
713 | setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); |
714 | |
715 | // Codes for which we want to perform some z-specific combinations. |
716 | setTargetDAGCombine({ISD::ZERO_EXTEND, |
717 | ISD::SIGN_EXTEND, |
718 | ISD::SIGN_EXTEND_INREG, |
719 | ISD::LOAD, |
720 | ISD::STORE, |
721 | ISD::VECTOR_SHUFFLE, |
722 | ISD::EXTRACT_VECTOR_ELT, |
723 | ISD::FP_ROUND, |
724 | ISD::STRICT_FP_ROUND, |
725 | ISD::FP_EXTEND, |
726 | ISD::SINT_TO_FP, |
727 | ISD::UINT_TO_FP, |
728 | ISD::STRICT_FP_EXTEND, |
729 | ISD::BSWAP, |
730 | ISD::SDIV, |
731 | ISD::UDIV, |
732 | ISD::SREM, |
733 | ISD::UREM, |
734 | ISD::INTRINSIC_VOID, |
735 | ISD::INTRINSIC_W_CHAIN}); |
736 | |
737 | // Handle intrinsics. |
738 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
739 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
740 | |
741 | // We want to use MVC in preference to even a single load/store pair. |
742 | MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; |
743 | MaxStoresPerMemcpyOptSize = 0; |
744 | |
745 | // The main memset sequence is a byte store followed by an MVC. |
746 | // Two STC or MV..I stores win over that, but the kind of fused stores |
747 | // generated by target-independent code don't when the byte value is |
748 | // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better |
749 | // than "STC;MVC". Handle the choice in target-specific code instead. |
750 | MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; |
751 | MaxStoresPerMemsetOptSize = 0; |
752 | |
753 | // Default to having -disable-strictnode-mutation on |
754 | IsStrictFPEnabled = true; |
755 | |
756 | if (Subtarget.isTargetzOS()) { |
757 | struct RTLibCallMapping { |
758 | RTLIB::Libcall Code; |
759 | const char *Name; |
760 | }; |
761 | static RTLibCallMapping RTLibCallCommon[] = { |
762 | #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, |
763 | #include "ZOSLibcallNames.def" |
764 | }; |
765 | for (auto &E : RTLibCallCommon) |
766 | setLibcallName(Call: E.Code, Name: E.Name); |
767 | } |
768 | } |
769 | |
770 | bool SystemZTargetLowering::useSoftFloat() const { |
771 | return Subtarget.hasSoftFloat(); |
772 | } |
773 | |
774 | EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, |
775 | LLVMContext &, EVT VT) const { |
776 | if (!VT.isVector()) |
777 | return MVT::i32; |
778 | return VT.changeVectorElementTypeToInteger(); |
779 | } |
780 | |
781 | bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( |
782 | const MachineFunction &MF, EVT VT) const { |
783 | VT = VT.getScalarType(); |
784 | |
785 | if (!VT.isSimple()) |
786 | return false; |
787 | |
788 | switch (VT.getSimpleVT().SimpleTy) { |
789 | case MVT::f32: |
790 | case MVT::f64: |
791 | return true; |
792 | case MVT::f128: |
793 | return Subtarget.hasVectorEnhancements1(); |
794 | default: |
795 | break; |
796 | } |
797 | |
798 | return false; |
799 | } |
800 | |
801 | // Return true if the constant can be generated with a vector instruction, |
802 | // such as VGM, VGMB or VREPI. |
803 | bool SystemZVectorConstantInfo::isVectorConstantLegal( |
804 | const SystemZSubtarget &Subtarget) { |
805 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
806 | if (!Subtarget.hasVector() || |
807 | (isFP128 && !Subtarget.hasVectorEnhancements1())) |
808 | return false; |
809 | |
810 | // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- |
811 | // preferred way of creating all-zero and all-one vectors so give it |
812 | // priority over other methods below. |
813 | unsigned Mask = 0; |
814 | unsigned I = 0; |
815 | for (; I < SystemZ::VectorBytes; ++I) { |
816 | uint64_t Byte = IntBits.lshr(shiftAmt: I * 8).trunc(width: 8).getZExtValue(); |
817 | if (Byte == 0xff) |
818 | Mask |= 1ULL << I; |
819 | else if (Byte != 0) |
820 | break; |
821 | } |
822 | if (I == SystemZ::VectorBytes) { |
823 | Opcode = SystemZISD::BYTE_MASK; |
824 | OpVals.push_back(Elt: Mask); |
825 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8), NumElements: 16); |
826 | return true; |
827 | } |
828 | |
829 | if (SplatBitSize > 64) |
830 | return false; |
831 | |
832 | auto tryValue = [&](uint64_t Value) -> bool { |
833 | // Try VECTOR REPLICATE IMMEDIATE |
834 | int64_t SignedValue = SignExtend64(X: Value, B: SplatBitSize); |
835 | if (isInt<16>(x: SignedValue)) { |
836 | OpVals.push_back(Elt: ((unsigned) SignedValue)); |
837 | Opcode = SystemZISD::REPLICATE; |
838 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize), |
839 | NumElements: SystemZ::VectorBits / SplatBitSize); |
840 | return true; |
841 | } |
842 | // Try VECTOR GENERATE MASK |
843 | unsigned Start, End; |
844 | if (TII->isRxSBGMask(Mask: Value, BitSize: SplatBitSize, Start, End)) { |
845 | // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 |
846 | // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for |
847 | // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). |
848 | OpVals.push_back(Elt: Start - (64 - SplatBitSize)); |
849 | OpVals.push_back(Elt: End - (64 - SplatBitSize)); |
850 | Opcode = SystemZISD::ROTATE_MASK; |
851 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: SplatBitSize), |
852 | NumElements: SystemZ::VectorBits / SplatBitSize); |
853 | return true; |
854 | } |
855 | return false; |
856 | }; |
857 | |
858 | // First try assuming that any undefined bits above the highest set bit |
859 | // and below the lowest set bit are 1s. This increases the likelihood of |
860 | // being able to use a sign-extended element value in VECTOR REPLICATE |
861 | // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. |
862 | uint64_t SplatBitsZ = SplatBits.getZExtValue(); |
863 | uint64_t SplatUndefZ = SplatUndef.getZExtValue(); |
864 | unsigned LowerBits = llvm::countr_zero(Val: SplatBitsZ); |
865 | unsigned UpperBits = llvm::countl_zero(Val: SplatBitsZ); |
866 | uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(N: LowerBits); |
867 | uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(N: UpperBits); |
868 | if (tryValue(SplatBitsZ | Upper | Lower)) |
869 | return true; |
870 | |
871 | // Now try assuming that any undefined bits between the first and |
872 | // last defined set bits are set. This increases the chances of |
873 | // using a non-wraparound mask. |
874 | uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; |
875 | return tryValue(SplatBitsZ | Middle); |
876 | } |
877 | |
878 | SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { |
879 | if (IntImm.isSingleWord()) { |
880 | IntBits = APInt(128, IntImm.getZExtValue()); |
881 | IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); |
882 | } else |
883 | IntBits = IntImm; |
884 | assert(IntBits.getBitWidth() == 128 && "Unsupported APInt." ); |
885 | |
886 | // Find the smallest splat. |
887 | SplatBits = IntImm; |
888 | unsigned Width = SplatBits.getBitWidth(); |
889 | while (Width > 8) { |
890 | unsigned HalfSize = Width / 2; |
891 | APInt HighValue = SplatBits.lshr(shiftAmt: HalfSize).trunc(width: HalfSize); |
892 | APInt LowValue = SplatBits.trunc(width: HalfSize); |
893 | |
894 | // If the two halves do not match, stop here. |
895 | if (HighValue != LowValue || 8 > HalfSize) |
896 | break; |
897 | |
898 | SplatBits = HighValue; |
899 | Width = HalfSize; |
900 | } |
901 | SplatUndef = 0; |
902 | SplatBitSize = Width; |
903 | } |
904 | |
905 | SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { |
906 | assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR" ); |
907 | bool HasAnyUndefs; |
908 | |
909 | // Get IntBits by finding the 128 bit splat. |
910 | BVN->isConstantSplat(SplatValue&: IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 128, |
911 | isBigEndian: true); |
912 | |
913 | // Get SplatBits by finding the 8 bit or greater splat. |
914 | BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, MinSplatBits: 8, |
915 | isBigEndian: true); |
916 | } |
917 | |
918 | bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
919 | bool ForCodeSize) const { |
920 | // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. |
921 | if (Imm.isZero() || Imm.isNegZero()) |
922 | return true; |
923 | |
924 | return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); |
925 | } |
926 | |
927 | /// Returns true if stack probing through inline assembly is requested. |
928 | bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { |
929 | // If the function specifically requests inline stack probes, emit them. |
930 | if (MF.getFunction().hasFnAttribute(Kind: "probe-stack" )) |
931 | return MF.getFunction().getFnAttribute(Kind: "probe-stack" ).getValueAsString() == |
932 | "inline-asm" ; |
933 | return false; |
934 | } |
935 | |
936 | TargetLowering::AtomicExpansionKind |
937 | SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { |
938 | // Lower fp128 the same way as i128. |
939 | if (LI->getType()->isFP128Ty()) |
940 | return AtomicExpansionKind::CastToInteger; |
941 | return AtomicExpansionKind::None; |
942 | } |
943 | |
944 | TargetLowering::AtomicExpansionKind |
945 | SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { |
946 | // Lower fp128 the same way as i128. |
947 | if (SI->getValueOperand()->getType()->isFP128Ty()) |
948 | return AtomicExpansionKind::CastToInteger; |
949 | return AtomicExpansionKind::None; |
950 | } |
951 | |
952 | TargetLowering::AtomicExpansionKind |
953 | SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
954 | // Don't expand subword operations as they require special treatment. |
955 | if (RMW->getType()->isIntegerTy(Bitwidth: 8) || RMW->getType()->isIntegerTy(Bitwidth: 16)) |
956 | return AtomicExpansionKind::None; |
957 | |
958 | // Don't expand if there is a target instruction available. |
959 | if (Subtarget.hasInterlockedAccess1() && |
960 | (RMW->getType()->isIntegerTy(Bitwidth: 32) || RMW->getType()->isIntegerTy(Bitwidth: 64)) && |
961 | (RMW->getOperation() == AtomicRMWInst::BinOp::Add || |
962 | RMW->getOperation() == AtomicRMWInst::BinOp::Sub || |
963 | RMW->getOperation() == AtomicRMWInst::BinOp::And || |
964 | RMW->getOperation() == AtomicRMWInst::BinOp::Or || |
965 | RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) |
966 | return AtomicExpansionKind::None; |
967 | |
968 | return AtomicExpansionKind::CmpXChg; |
969 | } |
970 | |
971 | bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
972 | // We can use CGFI or CLGFI. |
973 | return isInt<32>(x: Imm) || isUInt<32>(x: Imm); |
974 | } |
975 | |
976 | bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { |
977 | // We can use ALGFI or SLGFI. |
978 | return isUInt<32>(x: Imm) || isUInt<32>(x: -Imm); |
979 | } |
980 | |
981 | bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( |
982 | EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { |
983 | // Unaligned accesses should never be slower than the expanded version. |
984 | // We check specifically for aligned accesses in the few cases where |
985 | // they are required. |
986 | if (Fast) |
987 | *Fast = 1; |
988 | return true; |
989 | } |
990 | |
991 | // Information about the addressing mode for a memory access. |
992 | struct AddressingMode { |
993 | // True if a long displacement is supported. |
994 | bool LongDisplacement; |
995 | |
996 | // True if use of index register is supported. |
997 | bool IndexReg; |
998 | |
999 | AddressingMode(bool LongDispl, bool IdxReg) : |
1000 | LongDisplacement(LongDispl), IndexReg(IdxReg) {} |
1001 | }; |
1002 | |
1003 | // Return the desired addressing mode for a Load which has only one use (in |
1004 | // the same block) which is a Store. |
1005 | static AddressingMode getLoadStoreAddrMode(bool HasVector, |
1006 | Type *Ty) { |
1007 | // With vector support a Load->Store combination may be combined to either |
1008 | // an MVC or vector operations and it seems to work best to allow the |
1009 | // vector addressing mode. |
1010 | if (HasVector) |
1011 | return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); |
1012 | |
1013 | // Otherwise only the MVC case is special. |
1014 | bool MVC = Ty->isIntegerTy(Bitwidth: 8); |
1015 | return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); |
1016 | } |
1017 | |
1018 | // Return the addressing mode which seems most desirable given an LLVM |
1019 | // Instruction pointer. |
1020 | static AddressingMode |
1021 | supportedAddressingMode(Instruction *I, bool HasVector) { |
1022 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: I)) { |
1023 | switch (II->getIntrinsicID()) { |
1024 | default: break; |
1025 | case Intrinsic::memset: |
1026 | case Intrinsic::memmove: |
1027 | case Intrinsic::memcpy: |
1028 | return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); |
1029 | } |
1030 | } |
1031 | |
1032 | if (isa<LoadInst>(Val: I) && I->hasOneUse()) { |
1033 | auto *SingleUser = cast<Instruction>(Val: *I->user_begin()); |
1034 | if (SingleUser->getParent() == I->getParent()) { |
1035 | if (isa<ICmpInst>(Val: SingleUser)) { |
1036 | if (auto *C = dyn_cast<ConstantInt>(Val: SingleUser->getOperand(i: 1))) |
1037 | if (C->getBitWidth() <= 64 && |
1038 | (isInt<16>(x: C->getSExtValue()) || isUInt<16>(x: C->getZExtValue()))) |
1039 | // Comparison of memory with 16 bit signed / unsigned immediate |
1040 | return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); |
1041 | } else if (isa<StoreInst>(Val: SingleUser)) |
1042 | // Load->Store |
1043 | return getLoadStoreAddrMode(HasVector, Ty: I->getType()); |
1044 | } |
1045 | } else if (auto *StoreI = dyn_cast<StoreInst>(Val: I)) { |
1046 | if (auto *LoadI = dyn_cast<LoadInst>(Val: StoreI->getValueOperand())) |
1047 | if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) |
1048 | // Load->Store |
1049 | return getLoadStoreAddrMode(HasVector, Ty: LoadI->getType()); |
1050 | } |
1051 | |
1052 | if (HasVector && (isa<LoadInst>(Val: I) || isa<StoreInst>(Val: I))) { |
1053 | |
1054 | // * Use LDE instead of LE/LEY for z13 to avoid partial register |
1055 | // dependencies (LDE only supports small offsets). |
1056 | // * Utilize the vector registers to hold floating point |
1057 | // values (vector load / store instructions only support small |
1058 | // offsets). |
1059 | |
1060 | Type *MemAccessTy = (isa<LoadInst>(Val: I) ? I->getType() : |
1061 | I->getOperand(i: 0)->getType()); |
1062 | bool IsFPAccess = MemAccessTy->isFloatingPointTy(); |
1063 | bool IsVectorAccess = MemAccessTy->isVectorTy(); |
1064 | |
1065 | // A store of an extracted vector element will be combined into a VSTE type |
1066 | // instruction. |
1067 | if (!IsVectorAccess && isa<StoreInst>(Val: I)) { |
1068 | Value *DataOp = I->getOperand(i: 0); |
1069 | if (isa<ExtractElementInst>(Val: DataOp)) |
1070 | IsVectorAccess = true; |
1071 | } |
1072 | |
1073 | // A load which gets inserted into a vector element will be combined into a |
1074 | // VLE type instruction. |
1075 | if (!IsVectorAccess && isa<LoadInst>(Val: I) && I->hasOneUse()) { |
1076 | User *LoadUser = *I->user_begin(); |
1077 | if (isa<InsertElementInst>(Val: LoadUser)) |
1078 | IsVectorAccess = true; |
1079 | } |
1080 | |
1081 | if (IsFPAccess || IsVectorAccess) |
1082 | return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); |
1083 | } |
1084 | |
1085 | return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); |
1086 | } |
1087 | |
1088 | bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, |
1089 | const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { |
1090 | // Punt on globals for now, although they can be used in limited |
1091 | // RELATIVE LONG cases. |
1092 | if (AM.BaseGV) |
1093 | return false; |
1094 | |
1095 | // Require a 20-bit signed offset. |
1096 | if (!isInt<20>(x: AM.BaseOffs)) |
1097 | return false; |
1098 | |
1099 | bool RequireD12 = |
1100 | Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(Bitwidth: 128)); |
1101 | AddressingMode SupportedAM(!RequireD12, true); |
1102 | if (I != nullptr) |
1103 | SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); |
1104 | |
1105 | if (!SupportedAM.LongDisplacement && !isUInt<12>(x: AM.BaseOffs)) |
1106 | return false; |
1107 | |
1108 | if (!SupportedAM.IndexReg) |
1109 | // No indexing allowed. |
1110 | return AM.Scale == 0; |
1111 | else |
1112 | // Indexing is OK but no scale factor can be applied. |
1113 | return AM.Scale == 0 || AM.Scale == 1; |
1114 | } |
1115 | |
1116 | bool SystemZTargetLowering::findOptimalMemOpLowering( |
1117 | std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, |
1118 | unsigned SrcAS, const AttributeList &FuncAttributes) const { |
1119 | const int MVCFastLen = 16; |
1120 | |
1121 | if (Limit != ~unsigned(0)) { |
1122 | // Don't expand Op into scalar loads/stores in these cases: |
1123 | if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) |
1124 | return false; // Small memcpy: Use MVC |
1125 | if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) |
1126 | return false; // Small memset (first byte with STC/MVI): Use MVC |
1127 | if (Op.isZeroMemset()) |
1128 | return false; // Memset zero: Use XC |
1129 | } |
1130 | |
1131 | return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, |
1132 | SrcAS, FuncAttributes); |
1133 | } |
1134 | |
1135 | EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, |
1136 | const AttributeList &FuncAttributes) const { |
1137 | return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; |
1138 | } |
1139 | |
1140 | bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { |
1141 | if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) |
1142 | return false; |
1143 | unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); |
1144 | unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); |
1145 | return FromBits > ToBits; |
1146 | } |
1147 | |
1148 | bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { |
1149 | if (!FromVT.isInteger() || !ToVT.isInteger()) |
1150 | return false; |
1151 | unsigned FromBits = FromVT.getFixedSizeInBits(); |
1152 | unsigned ToBits = ToVT.getFixedSizeInBits(); |
1153 | return FromBits > ToBits; |
1154 | } |
1155 | |
1156 | //===----------------------------------------------------------------------===// |
1157 | // Inline asm support |
1158 | //===----------------------------------------------------------------------===// |
1159 | |
1160 | TargetLowering::ConstraintType |
1161 | SystemZTargetLowering::getConstraintType(StringRef Constraint) const { |
1162 | if (Constraint.size() == 1) { |
1163 | switch (Constraint[0]) { |
1164 | case 'a': // Address register |
1165 | case 'd': // Data register (equivalent to 'r') |
1166 | case 'f': // Floating-point register |
1167 | case 'h': // High-part register |
1168 | case 'r': // General-purpose register |
1169 | case 'v': // Vector register |
1170 | return C_RegisterClass; |
1171 | |
1172 | case 'Q': // Memory with base and unsigned 12-bit displacement |
1173 | case 'R': // Likewise, plus an index |
1174 | case 'S': // Memory with base and signed 20-bit displacement |
1175 | case 'T': // Likewise, plus an index |
1176 | case 'm': // Equivalent to 'T'. |
1177 | return C_Memory; |
1178 | |
1179 | case 'I': // Unsigned 8-bit constant |
1180 | case 'J': // Unsigned 12-bit constant |
1181 | case 'K': // Signed 16-bit constant |
1182 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1183 | case 'M': // 0x7fffffff |
1184 | return C_Immediate; |
1185 | |
1186 | default: |
1187 | break; |
1188 | } |
1189 | } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { |
1190 | switch (Constraint[1]) { |
1191 | case 'Q': // Address with base and unsigned 12-bit displacement |
1192 | case 'R': // Likewise, plus an index |
1193 | case 'S': // Address with base and signed 20-bit displacement |
1194 | case 'T': // Likewise, plus an index |
1195 | return C_Address; |
1196 | |
1197 | default: |
1198 | break; |
1199 | } |
1200 | } |
1201 | return TargetLowering::getConstraintType(Constraint); |
1202 | } |
1203 | |
1204 | TargetLowering::ConstraintWeight SystemZTargetLowering:: |
1205 | getSingleConstraintMatchWeight(AsmOperandInfo &info, |
1206 | const char *constraint) const { |
1207 | ConstraintWeight weight = CW_Invalid; |
1208 | Value *CallOperandVal = info.CallOperandVal; |
1209 | // If we don't have a value, we can't do a match, |
1210 | // but allow it at the lowest weight. |
1211 | if (!CallOperandVal) |
1212 | return CW_Default; |
1213 | Type *type = CallOperandVal->getType(); |
1214 | // Look at the constraint type. |
1215 | switch (*constraint) { |
1216 | default: |
1217 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
1218 | break; |
1219 | |
1220 | case 'a': // Address register |
1221 | case 'd': // Data register (equivalent to 'r') |
1222 | case 'h': // High-part register |
1223 | case 'r': // General-purpose register |
1224 | weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; |
1225 | break; |
1226 | |
1227 | case 'f': // Floating-point register |
1228 | if (!useSoftFloat()) |
1229 | weight = type->isFloatingPointTy() ? CW_Register : CW_Default; |
1230 | break; |
1231 | |
1232 | case 'v': // Vector register |
1233 | if (Subtarget.hasVector()) |
1234 | weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register |
1235 | : CW_Default; |
1236 | break; |
1237 | |
1238 | case 'I': // Unsigned 8-bit constant |
1239 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1240 | if (isUInt<8>(x: C->getZExtValue())) |
1241 | weight = CW_Constant; |
1242 | break; |
1243 | |
1244 | case 'J': // Unsigned 12-bit constant |
1245 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1246 | if (isUInt<12>(x: C->getZExtValue())) |
1247 | weight = CW_Constant; |
1248 | break; |
1249 | |
1250 | case 'K': // Signed 16-bit constant |
1251 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1252 | if (isInt<16>(x: C->getSExtValue())) |
1253 | weight = CW_Constant; |
1254 | break; |
1255 | |
1256 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1257 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1258 | if (isInt<20>(x: C->getSExtValue())) |
1259 | weight = CW_Constant; |
1260 | break; |
1261 | |
1262 | case 'M': // 0x7fffffff |
1263 | if (auto *C = dyn_cast<ConstantInt>(Val: CallOperandVal)) |
1264 | if (C->getZExtValue() == 0x7fffffff) |
1265 | weight = CW_Constant; |
1266 | break; |
1267 | } |
1268 | return weight; |
1269 | } |
1270 | |
1271 | // Parse a "{tNNN}" register constraint for which the register type "t" |
1272 | // has already been verified. MC is the class associated with "t" and |
1273 | // Map maps 0-based register numbers to LLVM register numbers. |
1274 | static std::pair<unsigned, const TargetRegisterClass *> |
1275 | parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, |
1276 | const unsigned *Map, unsigned Size) { |
1277 | assert(*(Constraint.end()-1) == '}' && "Missing '}'" ); |
1278 | if (isdigit(Constraint[2])) { |
1279 | unsigned Index; |
1280 | bool Failed = |
1281 | Constraint.slice(Start: 2, End: Constraint.size() - 1).getAsInteger(Radix: 10, Result&: Index); |
1282 | if (!Failed && Index < Size && Map[Index]) |
1283 | return std::make_pair(x: Map[Index], y&: RC); |
1284 | } |
1285 | return std::make_pair(x: 0U, y: nullptr); |
1286 | } |
1287 | |
1288 | std::pair<unsigned, const TargetRegisterClass *> |
1289 | SystemZTargetLowering::getRegForInlineAsmConstraint( |
1290 | const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { |
1291 | if (Constraint.size() == 1) { |
1292 | // GCC Constraint Letters |
1293 | switch (Constraint[0]) { |
1294 | default: break; |
1295 | case 'd': // Data register (equivalent to 'r') |
1296 | case 'r': // General-purpose register |
1297 | if (VT.getSizeInBits() == 64) |
1298 | return std::make_pair(0U, &SystemZ::GR64BitRegClass); |
1299 | else if (VT.getSizeInBits() == 128) |
1300 | return std::make_pair(0U, &SystemZ::GR128BitRegClass); |
1301 | return std::make_pair(0U, &SystemZ::GR32BitRegClass); |
1302 | |
1303 | case 'a': // Address register |
1304 | if (VT == MVT::i64) |
1305 | return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); |
1306 | else if (VT == MVT::i128) |
1307 | return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); |
1308 | return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); |
1309 | |
1310 | case 'h': // High-part register (an LLVM extension) |
1311 | return std::make_pair(0U, &SystemZ::GRH32BitRegClass); |
1312 | |
1313 | case 'f': // Floating-point register |
1314 | if (!useSoftFloat()) { |
1315 | if (VT.getSizeInBits() == 64) |
1316 | return std::make_pair(0U, &SystemZ::FP64BitRegClass); |
1317 | else if (VT.getSizeInBits() == 128) |
1318 | return std::make_pair(0U, &SystemZ::FP128BitRegClass); |
1319 | return std::make_pair(0U, &SystemZ::FP32BitRegClass); |
1320 | } |
1321 | break; |
1322 | |
1323 | case 'v': // Vector register |
1324 | if (Subtarget.hasVector()) { |
1325 | if (VT.getSizeInBits() == 32) |
1326 | return std::make_pair(0U, &SystemZ::VR32BitRegClass); |
1327 | if (VT.getSizeInBits() == 64) |
1328 | return std::make_pair(0U, &SystemZ::VR64BitRegClass); |
1329 | return std::make_pair(0U, &SystemZ::VR128BitRegClass); |
1330 | } |
1331 | break; |
1332 | } |
1333 | } |
1334 | if (Constraint.starts_with(Prefix: "{" )) { |
1335 | |
1336 | // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal |
1337 | // to check the size on. |
1338 | auto getVTSizeInBits = [&VT]() { |
1339 | return VT == MVT::Other ? 0 : VT.getSizeInBits(); |
1340 | }; |
1341 | |
1342 | // We need to override the default register parsing for GPRs and FPRs |
1343 | // because the interpretation depends on VT. The internal names of |
1344 | // the registers are also different from the external names |
1345 | // (F0D and F0S instead of F0, etc.). |
1346 | if (Constraint[1] == 'r') { |
1347 | if (getVTSizeInBits() == 32) |
1348 | return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, |
1349 | SystemZMC::GR32Regs, 16); |
1350 | if (getVTSizeInBits() == 128) |
1351 | return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, |
1352 | SystemZMC::GR128Regs, 16); |
1353 | return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, |
1354 | SystemZMC::GR64Regs, 16); |
1355 | } |
1356 | if (Constraint[1] == 'f') { |
1357 | if (useSoftFloat()) |
1358 | return std::make_pair( |
1359 | x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr)); |
1360 | if (getVTSizeInBits() == 32) |
1361 | return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, |
1362 | SystemZMC::FP32Regs, 16); |
1363 | if (getVTSizeInBits() == 128) |
1364 | return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, |
1365 | SystemZMC::FP128Regs, 16); |
1366 | return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, |
1367 | SystemZMC::FP64Regs, 16); |
1368 | } |
1369 | if (Constraint[1] == 'v') { |
1370 | if (!Subtarget.hasVector()) |
1371 | return std::make_pair( |
1372 | x: 0u, y: static_cast<const TargetRegisterClass *>(nullptr)); |
1373 | if (getVTSizeInBits() == 32) |
1374 | return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, |
1375 | SystemZMC::VR32Regs, 32); |
1376 | if (getVTSizeInBits() == 64) |
1377 | return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, |
1378 | SystemZMC::VR64Regs, 32); |
1379 | return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, |
1380 | SystemZMC::VR128Regs, 32); |
1381 | } |
1382 | } |
1383 | return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
1384 | } |
1385 | |
1386 | // FIXME? Maybe this could be a TableGen attribute on some registers and |
1387 | // this table could be generated automatically from RegInfo. |
1388 | Register |
1389 | SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, |
1390 | const MachineFunction &MF) const { |
1391 | Register Reg = |
1392 | StringSwitch<Register>(RegName) |
1393 | .Case("r4" , Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0) |
1394 | .Case("r15" , Subtarget.isTargetELF() ? SystemZ::R15D : 0) |
1395 | .Default(0); |
1396 | |
1397 | if (Reg) |
1398 | return Reg; |
1399 | report_fatal_error(reason: "Invalid register name global variable" ); |
1400 | } |
1401 | |
1402 | Register SystemZTargetLowering::getExceptionPointerRegister( |
1403 | const Constant *PersonalityFn) const { |
1404 | return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; |
1405 | } |
1406 | |
1407 | Register SystemZTargetLowering::getExceptionSelectorRegister( |
1408 | const Constant *PersonalityFn) const { |
1409 | return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; |
1410 | } |
1411 | |
1412 | void SystemZTargetLowering::LowerAsmOperandForConstraint( |
1413 | SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, |
1414 | SelectionDAG &DAG) const { |
1415 | // Only support length 1 constraints for now. |
1416 | if (Constraint.size() == 1) { |
1417 | switch (Constraint[0]) { |
1418 | case 'I': // Unsigned 8-bit constant |
1419 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1420 | if (isUInt<8>(x: C->getZExtValue())) |
1421 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1422 | VT: Op.getValueType())); |
1423 | return; |
1424 | |
1425 | case 'J': // Unsigned 12-bit constant |
1426 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1427 | if (isUInt<12>(x: C->getZExtValue())) |
1428 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1429 | VT: Op.getValueType())); |
1430 | return; |
1431 | |
1432 | case 'K': // Signed 16-bit constant |
1433 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1434 | if (isInt<16>(x: C->getSExtValue())) |
1435 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getSExtValue(), DL: SDLoc(Op), |
1436 | VT: Op.getValueType())); |
1437 | return; |
1438 | |
1439 | case 'L': // Signed 20-bit displacement (on all targets we support) |
1440 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1441 | if (isInt<20>(x: C->getSExtValue())) |
1442 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getSExtValue(), DL: SDLoc(Op), |
1443 | VT: Op.getValueType())); |
1444 | return; |
1445 | |
1446 | case 'M': // 0x7fffffff |
1447 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: Op)) |
1448 | if (C->getZExtValue() == 0x7fffffff) |
1449 | Ops.push_back(x: DAG.getTargetConstant(Val: C->getZExtValue(), DL: SDLoc(Op), |
1450 | VT: Op.getValueType())); |
1451 | return; |
1452 | } |
1453 | } |
1454 | TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
1455 | } |
1456 | |
1457 | //===----------------------------------------------------------------------===// |
1458 | // Calling conventions |
1459 | //===----------------------------------------------------------------------===// |
1460 | |
1461 | #include "SystemZGenCallingConv.inc" |
1462 | |
1463 | const MCPhysReg *SystemZTargetLowering::getScratchRegisters( |
1464 | CallingConv::ID) const { |
1465 | static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, |
1466 | SystemZ::R14D, 0 }; |
1467 | return ScratchRegs; |
1468 | } |
1469 | |
1470 | bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, |
1471 | Type *ToType) const { |
1472 | return isTruncateFree(FromType, ToType); |
1473 | } |
1474 | |
1475 | bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
1476 | return CI->isTailCall(); |
1477 | } |
1478 | |
1479 | // Value is a value that has been passed to us in the location described by VA |
1480 | // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining |
1481 | // any loads onto Chain. |
1482 | static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, |
1483 | CCValAssign &VA, SDValue Chain, |
1484 | SDValue Value) { |
1485 | // If the argument has been promoted from a smaller type, insert an |
1486 | // assertion to capture this. |
1487 | if (VA.getLocInfo() == CCValAssign::SExt) |
1488 | Value = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: VA.getLocVT(), N1: Value, |
1489 | N2: DAG.getValueType(VA.getValVT())); |
1490 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
1491 | Value = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: VA.getLocVT(), N1: Value, |
1492 | N2: DAG.getValueType(VA.getValVT())); |
1493 | |
1494 | if (VA.isExtInLoc()) |
1495 | Value = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: Value); |
1496 | else if (VA.getLocInfo() == CCValAssign::BCvt) { |
1497 | // If this is a short vector argument loaded from the stack, |
1498 | // extend from i64 to full vector size and then bitcast. |
1499 | assert(VA.getLocVT() == MVT::i64); |
1500 | assert(VA.getValVT().isVector()); |
1501 | Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); |
1502 | Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VA.getValVT(), Operand: Value); |
1503 | } else |
1504 | assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo" ); |
1505 | return Value; |
1506 | } |
1507 | |
1508 | // Value is a value of type VA.getValVT() that we need to copy into |
1509 | // the location described by VA. Return a copy of Value converted to |
1510 | // VA.getValVT(). The caller is responsible for handling indirect values. |
1511 | static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, |
1512 | CCValAssign &VA, SDValue Value) { |
1513 | switch (VA.getLocInfo()) { |
1514 | case CCValAssign::SExt: |
1515 | return DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1516 | case CCValAssign::ZExt: |
1517 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1518 | case CCValAssign::AExt: |
1519 | return DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: VA.getLocVT(), Operand: Value); |
1520 | case CCValAssign::BCvt: { |
1521 | assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); |
1522 | assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || |
1523 | VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); |
1524 | // For an f32 vararg we need to first promote it to an f64 and then |
1525 | // bitcast it to an i64. |
1526 | if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) |
1527 | Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); |
1528 | MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 |
1529 | ? MVT::v2i64 |
1530 | : VA.getLocVT(); |
1531 | Value = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: BitCastToType, Operand: Value); |
1532 | // For ELF, this is a short vector argument to be stored to the stack, |
1533 | // bitcast to v2i64 and then extract first element. |
1534 | if (BitCastToType == MVT::v2i64) |
1535 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, |
1536 | DAG.getConstant(0, DL, MVT::i32)); |
1537 | return Value; |
1538 | } |
1539 | case CCValAssign::Full: |
1540 | return Value; |
1541 | default: |
1542 | llvm_unreachable("Unhandled getLocInfo()" ); |
1543 | } |
1544 | } |
1545 | |
1546 | static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { |
1547 | SDLoc DL(In); |
1548 | SDValue Lo, Hi; |
1549 | if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { |
1550 | Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); |
1551 | Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, |
1552 | DAG.getNode(ISD::SRL, DL, MVT::i128, In, |
1553 | DAG.getConstant(64, DL, MVT::i32))); |
1554 | } else { |
1555 | std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); |
1556 | } |
1557 | |
1558 | SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, |
1559 | MVT::Untyped, Hi, Lo); |
1560 | return SDValue(Pair, 0); |
1561 | } |
1562 | |
1563 | static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { |
1564 | SDLoc DL(In); |
1565 | SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, |
1566 | DL, MVT::i64, In); |
1567 | SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, |
1568 | DL, MVT::i64, In); |
1569 | |
1570 | if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { |
1571 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); |
1572 | Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); |
1573 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, |
1574 | DAG.getConstant(64, DL, MVT::i32)); |
1575 | return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); |
1576 | } else { |
1577 | return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); |
1578 | } |
1579 | } |
1580 | |
1581 | bool SystemZTargetLowering::splitValueIntoRegisterParts( |
1582 | SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, |
1583 | unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { |
1584 | EVT ValueVT = Val.getValueType(); |
1585 | if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { |
1586 | // Inline assembly operand. |
1587 | Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); |
1588 | return true; |
1589 | } |
1590 | |
1591 | return false; |
1592 | } |
1593 | |
1594 | SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( |
1595 | SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, |
1596 | MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { |
1597 | if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { |
1598 | // Inline assembly operand. |
1599 | SDValue Res = lowerGR128ToI128(DAG, In: Parts[0]); |
1600 | return DAG.getBitcast(VT: ValueVT, V: Res); |
1601 | } |
1602 | |
1603 | return SDValue(); |
1604 | } |
1605 | |
1606 | SDValue SystemZTargetLowering::LowerFormalArguments( |
1607 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
1608 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, |
1609 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
1610 | MachineFunction &MF = DAG.getMachineFunction(); |
1611 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
1612 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
1613 | SystemZMachineFunctionInfo *FuncInfo = |
1614 | MF.getInfo<SystemZMachineFunctionInfo>(); |
1615 | auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); |
1616 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
1617 | |
1618 | // Assign locations to all of the incoming arguments. |
1619 | SmallVector<CCValAssign, 16> ArgLocs; |
1620 | SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
1621 | CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); |
1622 | FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); |
1623 | |
1624 | unsigned NumFixedGPRs = 0; |
1625 | unsigned NumFixedFPRs = 0; |
1626 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
1627 | SDValue ArgValue; |
1628 | CCValAssign &VA = ArgLocs[I]; |
1629 | EVT LocVT = VA.getLocVT(); |
1630 | if (VA.isRegLoc()) { |
1631 | // Arguments passed in registers |
1632 | const TargetRegisterClass *RC; |
1633 | switch (LocVT.getSimpleVT().SimpleTy) { |
1634 | default: |
1635 | // Integers smaller than i64 should be promoted to i64. |
1636 | llvm_unreachable("Unexpected argument type" ); |
1637 | case MVT::i32: |
1638 | NumFixedGPRs += 1; |
1639 | RC = &SystemZ::GR32BitRegClass; |
1640 | break; |
1641 | case MVT::i64: |
1642 | NumFixedGPRs += 1; |
1643 | RC = &SystemZ::GR64BitRegClass; |
1644 | break; |
1645 | case MVT::f32: |
1646 | NumFixedFPRs += 1; |
1647 | RC = &SystemZ::FP32BitRegClass; |
1648 | break; |
1649 | case MVT::f64: |
1650 | NumFixedFPRs += 1; |
1651 | RC = &SystemZ::FP64BitRegClass; |
1652 | break; |
1653 | case MVT::f128: |
1654 | NumFixedFPRs += 2; |
1655 | RC = &SystemZ::FP128BitRegClass; |
1656 | break; |
1657 | case MVT::v16i8: |
1658 | case MVT::v8i16: |
1659 | case MVT::v4i32: |
1660 | case MVT::v2i64: |
1661 | case MVT::v4f32: |
1662 | case MVT::v2f64: |
1663 | RC = &SystemZ::VR128BitRegClass; |
1664 | break; |
1665 | } |
1666 | |
1667 | Register VReg = MRI.createVirtualRegister(RegClass: RC); |
1668 | MRI.addLiveIn(Reg: VA.getLocReg(), vreg: VReg); |
1669 | ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: LocVT); |
1670 | } else { |
1671 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
1672 | |
1673 | // Create the frame index object for this incoming parameter. |
1674 | // FIXME: Pre-include call frame size in the offset, should not |
1675 | // need to manually add it here. |
1676 | int64_t ArgSPOffset = VA.getLocMemOffset(); |
1677 | if (Subtarget.isTargetXPLINK64()) { |
1678 | auto &XPRegs = |
1679 | Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
1680 | ArgSPOffset += XPRegs.getCallFrameSize(); |
1681 | } |
1682 | int FI = |
1683 | MFI.CreateFixedObject(Size: LocVT.getSizeInBits() / 8, SPOffset: ArgSPOffset, IsImmutable: true); |
1684 | |
1685 | // Create the SelectionDAG nodes corresponding to a load |
1686 | // from this parameter. Unpromoted ints and floats are |
1687 | // passed as right-justified 8-byte values. |
1688 | SDValue FIN = DAG.getFrameIndex(FI, VT: PtrVT); |
1689 | if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) |
1690 | FIN = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FIN, |
1691 | N2: DAG.getIntPtrConstant(Val: 4, DL)); |
1692 | ArgValue = DAG.getLoad(VT: LocVT, dl: DL, Chain, Ptr: FIN, |
1693 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
1694 | } |
1695 | |
1696 | // Convert the value of the argument register into the value that's |
1697 | // being passed. |
1698 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
1699 | InVals.push_back(Elt: DAG.getLoad(VT: VA.getValVT(), dl: DL, Chain, Ptr: ArgValue, |
1700 | PtrInfo: MachinePointerInfo())); |
1701 | // If the original argument was split (e.g. i128), we need |
1702 | // to load all parts of it here (using the same address). |
1703 | unsigned ArgIndex = Ins[I].OrigArgIndex; |
1704 | assert (Ins[I].PartOffset == 0); |
1705 | while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { |
1706 | CCValAssign &PartVA = ArgLocs[I + 1]; |
1707 | unsigned PartOffset = Ins[I + 1].PartOffset; |
1708 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: ArgValue, |
1709 | N2: DAG.getIntPtrConstant(Val: PartOffset, DL)); |
1710 | InVals.push_back(Elt: DAG.getLoad(VT: PartVA.getValVT(), dl: DL, Chain, Ptr: Address, |
1711 | PtrInfo: MachinePointerInfo())); |
1712 | ++I; |
1713 | } |
1714 | } else |
1715 | InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: ArgValue)); |
1716 | } |
1717 | |
1718 | if (IsVarArg && Subtarget.isTargetXPLINK64()) { |
1719 | // Save the number of non-varargs registers for later use by va_start, etc. |
1720 | FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); |
1721 | FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); |
1722 | |
1723 | auto *Regs = static_cast<SystemZXPLINK64Registers *>( |
1724 | Subtarget.getSpecialRegisters()); |
1725 | |
1726 | // Likewise the address (in the form of a frame index) of where the |
1727 | // first stack vararg would be. The 1-byte size here is arbitrary. |
1728 | // FIXME: Pre-include call frame size in the offset, should not |
1729 | // need to manually add it here. |
1730 | int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); |
1731 | int FI = MFI.CreateFixedObject(Size: 1, SPOffset: VarArgOffset, IsImmutable: true); |
1732 | FuncInfo->setVarArgsFrameIndex(FI); |
1733 | } |
1734 | |
1735 | if (IsVarArg && Subtarget.isTargetELF()) { |
1736 | // Save the number of non-varargs registers for later use by va_start, etc. |
1737 | FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); |
1738 | FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); |
1739 | |
1740 | // Likewise the address (in the form of a frame index) of where the |
1741 | // first stack vararg would be. The 1-byte size here is arbitrary. |
1742 | int64_t VarArgsOffset = CCInfo.getStackSize(); |
1743 | FuncInfo->setVarArgsFrameIndex( |
1744 | MFI.CreateFixedObject(Size: 1, SPOffset: VarArgsOffset, IsImmutable: true)); |
1745 | |
1746 | // ...and a similar frame index for the caller-allocated save area |
1747 | // that will be used to store the incoming registers. |
1748 | int64_t RegSaveOffset = |
1749 | -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; |
1750 | unsigned RegSaveIndex = MFI.CreateFixedObject(Size: 1, SPOffset: RegSaveOffset, IsImmutable: true); |
1751 | FuncInfo->setRegSaveFrameIndex(RegSaveIndex); |
1752 | |
1753 | // Store the FPR varargs in the reserved frame slots. (We store the |
1754 | // GPRs as part of the prologue.) |
1755 | if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { |
1756 | SDValue MemOps[SystemZ::ELFNumArgFPRs]; |
1757 | for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { |
1758 | unsigned Offset = TFL->getRegSpillOffset(MF, Reg: SystemZ::ELFArgFPRs[I]); |
1759 | int FI = |
1760 | MFI.CreateFixedObject(Size: 8, SPOffset: -SystemZMC::ELFCallFrameSize + Offset, IsImmutable: true); |
1761 | SDValue FIN = DAG.getFrameIndex(FI, VT: getPointerTy(DL: DAG.getDataLayout())); |
1762 | Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], |
1763 | &SystemZ::FP64BitRegClass); |
1764 | SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); |
1765 | MemOps[I] = DAG.getStore(Chain: ArgValue.getValue(R: 1), dl: DL, Val: ArgValue, Ptr: FIN, |
1766 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI)); |
1767 | } |
1768 | // Join the stores, which are independent of one another. |
1769 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
1770 | ArrayRef(&MemOps[NumFixedFPRs], |
1771 | SystemZ::ELFNumArgFPRs - NumFixedFPRs)); |
1772 | } |
1773 | } |
1774 | |
1775 | if (Subtarget.isTargetXPLINK64()) { |
1776 | // Create virual register for handling incoming "ADA" special register (R5) |
1777 | const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; |
1778 | Register ADAvReg = MRI.createVirtualRegister(RegClass: RC); |
1779 | auto *Regs = static_cast<SystemZXPLINK64Registers *>( |
1780 | Subtarget.getSpecialRegisters()); |
1781 | MRI.addLiveIn(Reg: Regs->getADARegister(), vreg: ADAvReg); |
1782 | FuncInfo->setADAVirtualRegister(ADAvReg); |
1783 | } |
1784 | return Chain; |
1785 | } |
1786 | |
1787 | static bool canUseSiblingCall(const CCState &ArgCCInfo, |
1788 | SmallVectorImpl<CCValAssign> &ArgLocs, |
1789 | SmallVectorImpl<ISD::OutputArg> &Outs) { |
1790 | // Punt if there are any indirect or stack arguments, or if the call |
1791 | // needs the callee-saved argument register R6, or if the call uses |
1792 | // the callee-saved register arguments SwiftSelf and SwiftError. |
1793 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
1794 | CCValAssign &VA = ArgLocs[I]; |
1795 | if (VA.getLocInfo() == CCValAssign::Indirect) |
1796 | return false; |
1797 | if (!VA.isRegLoc()) |
1798 | return false; |
1799 | Register Reg = VA.getLocReg(); |
1800 | if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) |
1801 | return false; |
1802 | if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) |
1803 | return false; |
1804 | } |
1805 | return true; |
1806 | } |
1807 | |
1808 | static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, |
1809 | unsigned Offset, bool LoadAdr = false) { |
1810 | MachineFunction &MF = DAG.getMachineFunction(); |
1811 | SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); |
1812 | unsigned ADAvReg = MFI->getADAVirtualRegister(); |
1813 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()); |
1814 | |
1815 | SDValue Reg = DAG.getRegister(Reg: ADAvReg, VT: PtrVT); |
1816 | SDValue Ofs = DAG.getTargetConstant(Val: Offset, DL, VT: PtrVT); |
1817 | |
1818 | SDValue Result = DAG.getNode(Opcode: SystemZISD::ADA_ENTRY, DL, VT: PtrVT, N1: Val, N2: Reg, N3: Ofs); |
1819 | if (!LoadAdr) |
1820 | Result = DAG.getLoad( |
1821 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result, PtrInfo: MachinePointerInfo(), Alignment: Align(8), |
1822 | MMOFlags: MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); |
1823 | |
1824 | return Result; |
1825 | } |
1826 | |
1827 | // ADA access using Global value |
1828 | // Note: for functions, address of descriptor is returned |
1829 | static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, |
1830 | EVT PtrVT) { |
1831 | unsigned ADAtype; |
1832 | bool LoadAddr = false; |
1833 | const GlobalAlias *GA = dyn_cast<GlobalAlias>(Val: GV); |
1834 | bool IsFunction = |
1835 | (isa<Function>(Val: GV)) || (GA && isa<Function>(Val: GA->getAliaseeObject())); |
1836 | bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); |
1837 | |
1838 | if (IsFunction) { |
1839 | if (IsInternal) { |
1840 | ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; |
1841 | LoadAddr = true; |
1842 | } else |
1843 | ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; |
1844 | } else { |
1845 | ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; |
1846 | } |
1847 | SDValue Val = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: ADAtype); |
1848 | |
1849 | return getADAEntry(DAG, Val, DL, Offset: 0, LoadAdr: LoadAddr); |
1850 | } |
1851 | |
1852 | static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, |
1853 | SDLoc &DL, SDValue &Chain) { |
1854 | unsigned ADADelta = 0; // ADA offset in desc. |
1855 | unsigned EPADelta = 8; // EPA offset in desc. |
1856 | MachineFunction &MF = DAG.getMachineFunction(); |
1857 | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DL: DAG.getDataLayout()); |
1858 | |
1859 | // XPLink calling convention. |
1860 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
1861 | bool IsInternal = (G->getGlobal()->hasInternalLinkage() || |
1862 | G->getGlobal()->hasPrivateLinkage()); |
1863 | if (IsInternal) { |
1864 | SystemZMachineFunctionInfo *MFI = |
1865 | MF.getInfo<SystemZMachineFunctionInfo>(); |
1866 | unsigned ADAvReg = MFI->getADAVirtualRegister(); |
1867 | ADA = DAG.getCopyFromReg(Chain, dl: DL, Reg: ADAvReg, VT: PtrVT); |
1868 | Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT); |
1869 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
1870 | return true; |
1871 | } else { |
1872 | SDValue GA = DAG.getTargetGlobalAddress( |
1873 | GV: G->getGlobal(), DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC); |
1874 | ADA = getADAEntry(DAG, Val: GA, DL, Offset: ADADelta); |
1875 | Callee = getADAEntry(DAG, Val: GA, DL, Offset: EPADelta); |
1876 | } |
1877 | } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
1878 | SDValue ES = DAG.getTargetExternalSymbol( |
1879 | Sym: E->getSymbol(), VT: PtrVT, TargetFlags: SystemZII::MO_ADA_DIRECT_FUNC_DESC); |
1880 | ADA = getADAEntry(DAG, Val: ES, DL, Offset: ADADelta); |
1881 | Callee = getADAEntry(DAG, Val: ES, DL, Offset: EPADelta); |
1882 | } else { |
1883 | // Function pointer case |
1884 | ADA = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee, |
1885 | N2: DAG.getConstant(Val: ADADelta, DL, VT: PtrVT)); |
1886 | ADA = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: ADA, |
1887 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
1888 | Callee = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Callee, |
1889 | N2: DAG.getConstant(Val: EPADelta, DL, VT: PtrVT)); |
1890 | Callee = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Callee, |
1891 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
1892 | } |
1893 | return false; |
1894 | } |
1895 | |
1896 | SDValue |
1897 | SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, |
1898 | SmallVectorImpl<SDValue> &InVals) const { |
1899 | SelectionDAG &DAG = CLI.DAG; |
1900 | SDLoc &DL = CLI.DL; |
1901 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
1902 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
1903 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
1904 | SDValue Chain = CLI.Chain; |
1905 | SDValue Callee = CLI.Callee; |
1906 | bool &IsTailCall = CLI.IsTailCall; |
1907 | CallingConv::ID CallConv = CLI.CallConv; |
1908 | bool IsVarArg = CLI.IsVarArg; |
1909 | MachineFunction &MF = DAG.getMachineFunction(); |
1910 | EVT PtrVT = getPointerTy(DL: MF.getDataLayout()); |
1911 | LLVMContext &Ctx = *DAG.getContext(); |
1912 | SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); |
1913 | |
1914 | // FIXME: z/OS support to be added in later. |
1915 | if (Subtarget.isTargetXPLINK64()) |
1916 | IsTailCall = false; |
1917 | |
1918 | // Analyze the operands of the call, assigning locations to each operand. |
1919 | SmallVector<CCValAssign, 16> ArgLocs; |
1920 | SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); |
1921 | ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); |
1922 | |
1923 | // We don't support GuaranteedTailCallOpt, only automatically-detected |
1924 | // sibling calls. |
1925 | if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) |
1926 | IsTailCall = false; |
1927 | |
1928 | // Get a count of how many bytes are to be pushed on the stack. |
1929 | unsigned NumBytes = ArgCCInfo.getStackSize(); |
1930 | |
1931 | // Mark the start of the call. |
1932 | if (!IsTailCall) |
1933 | Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL); |
1934 | |
1935 | // Copy argument values to their designated locations. |
1936 | SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; |
1937 | SmallVector<SDValue, 8> MemOpChains; |
1938 | SDValue StackPtr; |
1939 | for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { |
1940 | CCValAssign &VA = ArgLocs[I]; |
1941 | SDValue ArgValue = OutVals[I]; |
1942 | |
1943 | if (VA.getLocInfo() == CCValAssign::Indirect) { |
1944 | // Store the argument in a stack slot and pass its address. |
1945 | unsigned ArgIndex = Outs[I].OrigArgIndex; |
1946 | EVT SlotVT; |
1947 | if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { |
1948 | // Allocate the full stack space for a promoted (and split) argument. |
1949 | Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; |
1950 | EVT OrigArgVT = getValueType(DL: MF.getDataLayout(), Ty: OrigArgType); |
1951 | MVT PartVT = getRegisterTypeForCallingConv(Context&: Ctx, CC: CLI.CallConv, VT: OrigArgVT); |
1952 | unsigned N = getNumRegistersForCallingConv(Context&: Ctx, CC: CLI.CallConv, VT: OrigArgVT); |
1953 | SlotVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: PartVT.getSizeInBits() * N); |
1954 | } else { |
1955 | SlotVT = Outs[I].VT; |
1956 | } |
1957 | SDValue SpillSlot = DAG.CreateStackTemporary(VT: SlotVT); |
1958 | int FI = cast<FrameIndexSDNode>(Val&: SpillSlot)->getIndex(); |
1959 | MemOpChains.push_back( |
1960 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: SpillSlot, |
1961 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
1962 | // If the original argument was split (e.g. i128), we need |
1963 | // to store all parts of it here (and pass just one address). |
1964 | assert (Outs[I].PartOffset == 0); |
1965 | while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { |
1966 | SDValue PartValue = OutVals[I + 1]; |
1967 | unsigned PartOffset = Outs[I + 1].PartOffset; |
1968 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: SpillSlot, |
1969 | N2: DAG.getIntPtrConstant(Val: PartOffset, DL)); |
1970 | MemOpChains.push_back( |
1971 | Elt: DAG.getStore(Chain, dl: DL, Val: PartValue, Ptr: Address, |
1972 | PtrInfo: MachinePointerInfo::getFixedStack(MF, FI))); |
1973 | assert((PartOffset + PartValue.getValueType().getStoreSize() <= |
1974 | SlotVT.getStoreSize()) && "Not enough space for argument part!" ); |
1975 | ++I; |
1976 | } |
1977 | ArgValue = SpillSlot; |
1978 | } else |
1979 | ArgValue = convertValVTToLocVT(DAG, DL, VA, Value: ArgValue); |
1980 | |
1981 | if (VA.isRegLoc()) { |
1982 | // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a |
1983 | // MVT::i128 type. We decompose the 128-bit type to a pair of its high |
1984 | // and low values. |
1985 | if (VA.getLocVT() == MVT::i128) |
1986 | ArgValue = lowerI128ToGR128(DAG, In: ArgValue); |
1987 | // Queue up the argument copies and emit them at the end. |
1988 | RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: ArgValue)); |
1989 | } else { |
1990 | assert(VA.isMemLoc() && "Argument not register or memory" ); |
1991 | |
1992 | // Work out the address of the stack slot. Unpromoted ints and |
1993 | // floats are passed as right-justified 8-byte values. |
1994 | if (!StackPtr.getNode()) |
1995 | StackPtr = DAG.getCopyFromReg(Chain, dl: DL, |
1996 | Reg: Regs->getStackPointerRegister(), VT: PtrVT); |
1997 | unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + |
1998 | VA.getLocMemOffset(); |
1999 | if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) |
2000 | Offset += 4; |
2001 | SDValue Address = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: StackPtr, |
2002 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
2003 | |
2004 | // Emit the store. |
2005 | MemOpChains.push_back( |
2006 | Elt: DAG.getStore(Chain, dl: DL, Val: ArgValue, Ptr: Address, PtrInfo: MachinePointerInfo())); |
2007 | |
2008 | // Although long doubles or vectors are passed through the stack when |
2009 | // they are vararg (non-fixed arguments), if a long double or vector |
2010 | // occupies the third and fourth slot of the argument list GPR3 should |
2011 | // still shadow the third slot of the argument list. |
2012 | if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { |
2013 | SDValue ShadowArgValue = |
2014 | DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, |
2015 | DAG.getIntPtrConstant(1, DL)); |
2016 | RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); |
2017 | } |
2018 | } |
2019 | } |
2020 | |
2021 | // Join the stores, which are independent of one another. |
2022 | if (!MemOpChains.empty()) |
2023 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); |
2024 | |
2025 | // Accept direct calls by converting symbolic call addresses to the |
2026 | // associated Target* opcodes. Force %r1 to be used for indirect |
2027 | // tail calls. |
2028 | SDValue Glue; |
2029 | |
2030 | if (Subtarget.isTargetXPLINK64()) { |
2031 | SDValue ADA; |
2032 | bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); |
2033 | if (!IsBRASL) { |
2034 | unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) |
2035 | ->getAddressOfCalleeRegister(); |
2036 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: CalleeReg, N: Callee, Glue); |
2037 | Glue = Chain.getValue(R: 1); |
2038 | Callee = DAG.getRegister(Reg: CalleeReg, VT: Callee.getValueType()); |
2039 | } |
2040 | RegsToPass.push_back(Elt: std::make_pair( |
2041 | x: static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), y&: ADA)); |
2042 | } else { |
2043 | if (auto *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) { |
2044 | Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL, VT: PtrVT); |
2045 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
2046 | } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) { |
2047 | Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT); |
2048 | Callee = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Callee); |
2049 | } else if (IsTailCall) { |
2050 | Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); |
2051 | Glue = Chain.getValue(R: 1); |
2052 | Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); |
2053 | } |
2054 | } |
2055 | |
2056 | // Build a sequence of copy-to-reg nodes, chained and glued together. |
2057 | for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { |
2058 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: RegsToPass[I].first, |
2059 | N: RegsToPass[I].second, Glue); |
2060 | Glue = Chain.getValue(R: 1); |
2061 | } |
2062 | |
2063 | // The first call operand is the chain and the second is the target address. |
2064 | SmallVector<SDValue, 8> Ops; |
2065 | Ops.push_back(Elt: Chain); |
2066 | Ops.push_back(Elt: Callee); |
2067 | |
2068 | // Add argument registers to the end of the list so that they are |
2069 | // known live into the call. |
2070 | for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) |
2071 | Ops.push_back(Elt: DAG.getRegister(Reg: RegsToPass[I].first, |
2072 | VT: RegsToPass[I].second.getValueType())); |
2073 | |
2074 | // Add a register mask operand representing the call-preserved registers. |
2075 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2076 | const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); |
2077 | assert(Mask && "Missing call preserved mask for calling convention" ); |
2078 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
2079 | |
2080 | // Glue the call to the argument copies, if any. |
2081 | if (Glue.getNode()) |
2082 | Ops.push_back(Elt: Glue); |
2083 | |
2084 | // Emit the call. |
2085 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
2086 | if (IsTailCall) { |
2087 | SDValue Ret = DAG.getNode(Opcode: SystemZISD::SIBCALL, DL, VTList: NodeTys, Ops); |
2088 | DAG.addNoMergeSiteInfo(Node: Ret.getNode(), NoMerge: CLI.NoMerge); |
2089 | return Ret; |
2090 | } |
2091 | Chain = DAG.getNode(Opcode: SystemZISD::CALL, DL, VTList: NodeTys, Ops); |
2092 | DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge); |
2093 | Glue = Chain.getValue(R: 1); |
2094 | |
2095 | // Mark the end of the call, which is glued to the call itself. |
2096 | Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue, DL); |
2097 | Glue = Chain.getValue(R: 1); |
2098 | |
2099 | // Assign locations to each value returned by this call. |
2100 | SmallVector<CCValAssign, 16> RetLocs; |
2101 | CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); |
2102 | RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); |
2103 | |
2104 | // Copy all of the result registers out of their specified physreg. |
2105 | for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { |
2106 | CCValAssign &VA = RetLocs[I]; |
2107 | |
2108 | // Copy the value out, gluing the copy to the end of the call sequence. |
2109 | SDValue RetValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VA.getLocReg(), |
2110 | VT: VA.getLocVT(), Glue); |
2111 | Chain = RetValue.getValue(R: 1); |
2112 | Glue = RetValue.getValue(R: 2); |
2113 | |
2114 | // Convert the value of the return register into the value that's |
2115 | // being returned. |
2116 | InVals.push_back(Elt: convertLocVTToValVT(DAG, DL, VA, Chain, Value: RetValue)); |
2117 | } |
2118 | |
2119 | return Chain; |
2120 | } |
2121 | |
2122 | // Generate a call taking the given operands as arguments and returning a |
2123 | // result of type RetVT. |
2124 | std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( |
2125 | SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, |
2126 | ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, |
2127 | bool DoesNotReturn, bool IsReturnValueUsed) const { |
2128 | TargetLowering::ArgListTy Args; |
2129 | Args.reserve(n: Ops.size()); |
2130 | |
2131 | TargetLowering::ArgListEntry Entry; |
2132 | for (SDValue Op : Ops) { |
2133 | Entry.Node = Op; |
2134 | Entry.Ty = Entry.Node.getValueType().getTypeForEVT(Context&: *DAG.getContext()); |
2135 | Entry.IsSExt = shouldSignExtendTypeInLibCall(Type: Op.getValueType(), IsSigned); |
2136 | Entry.IsZExt = !shouldSignExtendTypeInLibCall(Type: Op.getValueType(), IsSigned); |
2137 | Args.push_back(x: Entry); |
2138 | } |
2139 | |
2140 | SDValue Callee = |
2141 | DAG.getExternalSymbol(Sym: CalleeName, VT: getPointerTy(DL: DAG.getDataLayout())); |
2142 | |
2143 | Type *RetTy = RetVT.getTypeForEVT(Context&: *DAG.getContext()); |
2144 | TargetLowering::CallLoweringInfo CLI(DAG); |
2145 | bool SignExtend = shouldSignExtendTypeInLibCall(Type: RetVT, IsSigned); |
2146 | CLI.setDebugLoc(DL) |
2147 | .setChain(Chain) |
2148 | .setCallee(CC: CallConv, ResultType: RetTy, Target: Callee, ArgsList: std::move(Args)) |
2149 | .setNoReturn(DoesNotReturn) |
2150 | .setDiscardResult(!IsReturnValueUsed) |
2151 | .setSExtResult(SignExtend) |
2152 | .setZExtResult(!SignExtend); |
2153 | return LowerCallTo(CLI); |
2154 | } |
2155 | |
2156 | bool SystemZTargetLowering:: |
2157 | CanLowerReturn(CallingConv::ID CallConv, |
2158 | MachineFunction &MF, bool isVarArg, |
2159 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2160 | LLVMContext &Context) const { |
2161 | // Special case that we cannot easily detect in RetCC_SystemZ since |
2162 | // i128 may not be a legal type. |
2163 | for (auto &Out : Outs) |
2164 | if (Out.ArgVT == MVT::i128) |
2165 | return false; |
2166 | |
2167 | SmallVector<CCValAssign, 16> RetLocs; |
2168 | CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); |
2169 | return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); |
2170 | } |
2171 | |
2172 | SDValue |
2173 | SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2174 | bool IsVarArg, |
2175 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2176 | const SmallVectorImpl<SDValue> &OutVals, |
2177 | const SDLoc &DL, SelectionDAG &DAG) const { |
2178 | MachineFunction &MF = DAG.getMachineFunction(); |
2179 | |
2180 | // Assign locations to each returned value. |
2181 | SmallVector<CCValAssign, 16> RetLocs; |
2182 | CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); |
2183 | RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); |
2184 | |
2185 | // Quick exit for void returns |
2186 | if (RetLocs.empty()) |
2187 | return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); |
2188 | |
2189 | if (CallConv == CallingConv::GHC) |
2190 | report_fatal_error(reason: "GHC functions return void only" ); |
2191 | |
2192 | // Copy the result values into the output registers. |
2193 | SDValue Glue; |
2194 | SmallVector<SDValue, 4> RetOps; |
2195 | RetOps.push_back(Elt: Chain); |
2196 | for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { |
2197 | CCValAssign &VA = RetLocs[I]; |
2198 | SDValue RetValue = OutVals[I]; |
2199 | |
2200 | // Make the return register live on exit. |
2201 | assert(VA.isRegLoc() && "Can only return in registers!" ); |
2202 | |
2203 | // Promote the value as required. |
2204 | RetValue = convertValVTToLocVT(DAG, DL, VA, Value: RetValue); |
2205 | |
2206 | // Chain and glue the copies together. |
2207 | Register Reg = VA.getLocReg(); |
2208 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg, N: RetValue, Glue); |
2209 | Glue = Chain.getValue(R: 1); |
2210 | RetOps.push_back(Elt: DAG.getRegister(Reg, VT: VA.getLocVT())); |
2211 | } |
2212 | |
2213 | // Update chain and glue. |
2214 | RetOps[0] = Chain; |
2215 | if (Glue.getNode()) |
2216 | RetOps.push_back(Elt: Glue); |
2217 | |
2218 | return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); |
2219 | } |
2220 | |
2221 | // Return true if Op is an intrinsic node with chain that returns the CC value |
2222 | // as its only (other) argument. Provide the associated SystemZISD opcode and |
2223 | // the mask of valid CC values if so. |
2224 | static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, |
2225 | unsigned &CCValid) { |
2226 | unsigned Id = Op.getConstantOperandVal(i: 1); |
2227 | switch (Id) { |
2228 | case Intrinsic::s390_tbegin: |
2229 | Opcode = SystemZISD::TBEGIN; |
2230 | CCValid = SystemZ::CCMASK_TBEGIN; |
2231 | return true; |
2232 | |
2233 | case Intrinsic::s390_tbegin_nofloat: |
2234 | Opcode = SystemZISD::TBEGIN_NOFLOAT; |
2235 | CCValid = SystemZ::CCMASK_TBEGIN; |
2236 | return true; |
2237 | |
2238 | case Intrinsic::s390_tend: |
2239 | Opcode = SystemZISD::TEND; |
2240 | CCValid = SystemZ::CCMASK_TEND; |
2241 | return true; |
2242 | |
2243 | default: |
2244 | return false; |
2245 | } |
2246 | } |
2247 | |
2248 | // Return true if Op is an intrinsic node without chain that returns the |
2249 | // CC value as its final argument. Provide the associated SystemZISD |
2250 | // opcode and the mask of valid CC values if so. |
2251 | static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { |
2252 | unsigned Id = Op.getConstantOperandVal(i: 0); |
2253 | switch (Id) { |
2254 | case Intrinsic::s390_vpkshs: |
2255 | case Intrinsic::s390_vpksfs: |
2256 | case Intrinsic::s390_vpksgs: |
2257 | Opcode = SystemZISD::PACKS_CC; |
2258 | CCValid = SystemZ::CCMASK_VCMP; |
2259 | return true; |
2260 | |
2261 | case Intrinsic::s390_vpklshs: |
2262 | case Intrinsic::s390_vpklsfs: |
2263 | case Intrinsic::s390_vpklsgs: |
2264 | Opcode = SystemZISD::PACKLS_CC; |
2265 | CCValid = SystemZ::CCMASK_VCMP; |
2266 | return true; |
2267 | |
2268 | case Intrinsic::s390_vceqbs: |
2269 | case Intrinsic::s390_vceqhs: |
2270 | case Intrinsic::s390_vceqfs: |
2271 | case Intrinsic::s390_vceqgs: |
2272 | Opcode = SystemZISD::VICMPES; |
2273 | CCValid = SystemZ::CCMASK_VCMP; |
2274 | return true; |
2275 | |
2276 | case Intrinsic::s390_vchbs: |
2277 | case Intrinsic::s390_vchhs: |
2278 | case Intrinsic::s390_vchfs: |
2279 | case Intrinsic::s390_vchgs: |
2280 | Opcode = SystemZISD::VICMPHS; |
2281 | CCValid = SystemZ::CCMASK_VCMP; |
2282 | return true; |
2283 | |
2284 | case Intrinsic::s390_vchlbs: |
2285 | case Intrinsic::s390_vchlhs: |
2286 | case Intrinsic::s390_vchlfs: |
2287 | case Intrinsic::s390_vchlgs: |
2288 | Opcode = SystemZISD::VICMPHLS; |
2289 | CCValid = SystemZ::CCMASK_VCMP; |
2290 | return true; |
2291 | |
2292 | case Intrinsic::s390_vtm: |
2293 | Opcode = SystemZISD::VTM; |
2294 | CCValid = SystemZ::CCMASK_VCMP; |
2295 | return true; |
2296 | |
2297 | case Intrinsic::s390_vfaebs: |
2298 | case Intrinsic::s390_vfaehs: |
2299 | case Intrinsic::s390_vfaefs: |
2300 | Opcode = SystemZISD::VFAE_CC; |
2301 | CCValid = SystemZ::CCMASK_ANY; |
2302 | return true; |
2303 | |
2304 | case Intrinsic::s390_vfaezbs: |
2305 | case Intrinsic::s390_vfaezhs: |
2306 | case Intrinsic::s390_vfaezfs: |
2307 | Opcode = SystemZISD::VFAEZ_CC; |
2308 | CCValid = SystemZ::CCMASK_ANY; |
2309 | return true; |
2310 | |
2311 | case Intrinsic::s390_vfeebs: |
2312 | case Intrinsic::s390_vfeehs: |
2313 | case Intrinsic::s390_vfeefs: |
2314 | Opcode = SystemZISD::VFEE_CC; |
2315 | CCValid = SystemZ::CCMASK_ANY; |
2316 | return true; |
2317 | |
2318 | case Intrinsic::s390_vfeezbs: |
2319 | case Intrinsic::s390_vfeezhs: |
2320 | case Intrinsic::s390_vfeezfs: |
2321 | Opcode = SystemZISD::VFEEZ_CC; |
2322 | CCValid = SystemZ::CCMASK_ANY; |
2323 | return true; |
2324 | |
2325 | case Intrinsic::s390_vfenebs: |
2326 | case Intrinsic::s390_vfenehs: |
2327 | case Intrinsic::s390_vfenefs: |
2328 | Opcode = SystemZISD::VFENE_CC; |
2329 | CCValid = SystemZ::CCMASK_ANY; |
2330 | return true; |
2331 | |
2332 | case Intrinsic::s390_vfenezbs: |
2333 | case Intrinsic::s390_vfenezhs: |
2334 | case Intrinsic::s390_vfenezfs: |
2335 | Opcode = SystemZISD::VFENEZ_CC; |
2336 | CCValid = SystemZ::CCMASK_ANY; |
2337 | return true; |
2338 | |
2339 | case Intrinsic::s390_vistrbs: |
2340 | case Intrinsic::s390_vistrhs: |
2341 | case Intrinsic::s390_vistrfs: |
2342 | Opcode = SystemZISD::VISTR_CC; |
2343 | CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; |
2344 | return true; |
2345 | |
2346 | case Intrinsic::s390_vstrcbs: |
2347 | case Intrinsic::s390_vstrchs: |
2348 | case Intrinsic::s390_vstrcfs: |
2349 | Opcode = SystemZISD::VSTRC_CC; |
2350 | CCValid = SystemZ::CCMASK_ANY; |
2351 | return true; |
2352 | |
2353 | case Intrinsic::s390_vstrczbs: |
2354 | case Intrinsic::s390_vstrczhs: |
2355 | case Intrinsic::s390_vstrczfs: |
2356 | Opcode = SystemZISD::VSTRCZ_CC; |
2357 | CCValid = SystemZ::CCMASK_ANY; |
2358 | return true; |
2359 | |
2360 | case Intrinsic::s390_vstrsb: |
2361 | case Intrinsic::s390_vstrsh: |
2362 | case Intrinsic::s390_vstrsf: |
2363 | Opcode = SystemZISD::VSTRS_CC; |
2364 | CCValid = SystemZ::CCMASK_ANY; |
2365 | return true; |
2366 | |
2367 | case Intrinsic::s390_vstrszb: |
2368 | case Intrinsic::s390_vstrszh: |
2369 | case Intrinsic::s390_vstrszf: |
2370 | Opcode = SystemZISD::VSTRSZ_CC; |
2371 | CCValid = SystemZ::CCMASK_ANY; |
2372 | return true; |
2373 | |
2374 | case Intrinsic::s390_vfcedbs: |
2375 | case Intrinsic::s390_vfcesbs: |
2376 | Opcode = SystemZISD::VFCMPES; |
2377 | CCValid = SystemZ::CCMASK_VCMP; |
2378 | return true; |
2379 | |
2380 | case Intrinsic::s390_vfchdbs: |
2381 | case Intrinsic::s390_vfchsbs: |
2382 | Opcode = SystemZISD::VFCMPHS; |
2383 | CCValid = SystemZ::CCMASK_VCMP; |
2384 | return true; |
2385 | |
2386 | case Intrinsic::s390_vfchedbs: |
2387 | case Intrinsic::s390_vfchesbs: |
2388 | Opcode = SystemZISD::VFCMPHES; |
2389 | CCValid = SystemZ::CCMASK_VCMP; |
2390 | return true; |
2391 | |
2392 | case Intrinsic::s390_vftcidb: |
2393 | case Intrinsic::s390_vftcisb: |
2394 | Opcode = SystemZISD::VFTCI; |
2395 | CCValid = SystemZ::CCMASK_VCMP; |
2396 | return true; |
2397 | |
2398 | case Intrinsic::s390_tdc: |
2399 | Opcode = SystemZISD::TDC; |
2400 | CCValid = SystemZ::CCMASK_TDC; |
2401 | return true; |
2402 | |
2403 | default: |
2404 | return false; |
2405 | } |
2406 | } |
2407 | |
2408 | // Emit an intrinsic with chain and an explicit CC register result. |
2409 | static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, |
2410 | unsigned Opcode) { |
2411 | // Copy all operands except the intrinsic ID. |
2412 | unsigned NumOps = Op.getNumOperands(); |
2413 | SmallVector<SDValue, 6> Ops; |
2414 | Ops.reserve(N: NumOps - 1); |
2415 | Ops.push_back(Elt: Op.getOperand(i: 0)); |
2416 | for (unsigned I = 2; I < NumOps; ++I) |
2417 | Ops.push_back(Elt: Op.getOperand(i: I)); |
2418 | |
2419 | assert(Op->getNumValues() == 2 && "Expected only CC result and chain" ); |
2420 | SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); |
2421 | SDValue Intr = DAG.getNode(Opcode, DL: SDLoc(Op), VTList: RawVTs, Ops); |
2422 | SDValue OldChain = SDValue(Op.getNode(), 1); |
2423 | SDValue NewChain = SDValue(Intr.getNode(), 1); |
2424 | DAG.ReplaceAllUsesOfValueWith(From: OldChain, To: NewChain); |
2425 | return Intr.getNode(); |
2426 | } |
2427 | |
2428 | // Emit an intrinsic with an explicit CC register result. |
2429 | static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, |
2430 | unsigned Opcode) { |
2431 | // Copy all operands except the intrinsic ID. |
2432 | unsigned NumOps = Op.getNumOperands(); |
2433 | SmallVector<SDValue, 6> Ops; |
2434 | Ops.reserve(N: NumOps - 1); |
2435 | for (unsigned I = 1; I < NumOps; ++I) |
2436 | Ops.push_back(Elt: Op.getOperand(i: I)); |
2437 | |
2438 | SDValue Intr = DAG.getNode(Opcode, DL: SDLoc(Op), VTList: Op->getVTList(), Ops); |
2439 | return Intr.getNode(); |
2440 | } |
2441 | |
2442 | // CC is a comparison that will be implemented using an integer or |
2443 | // floating-point comparison. Return the condition code mask for |
2444 | // a branch on true. In the integer case, CCMASK_CMP_UO is set for |
2445 | // unsigned comparisons and clear for signed ones. In the floating-point |
2446 | // case, CCMASK_CMP_UO has its normal mask meaning (unordered). |
2447 | static unsigned CCMaskForCondCode(ISD::CondCode CC) { |
2448 | #define CONV(X) \ |
2449 | case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ |
2450 | case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ |
2451 | case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X |
2452 | |
2453 | switch (CC) { |
2454 | default: |
2455 | llvm_unreachable("Invalid integer condition!" ); |
2456 | |
2457 | CONV(EQ); |
2458 | CONV(NE); |
2459 | CONV(GT); |
2460 | CONV(GE); |
2461 | CONV(LT); |
2462 | CONV(LE); |
2463 | |
2464 | case ISD::SETO: return SystemZ::CCMASK_CMP_O; |
2465 | case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; |
2466 | } |
2467 | #undef CONV |
2468 | } |
2469 | |
2470 | // If C can be converted to a comparison against zero, adjust the operands |
2471 | // as necessary. |
2472 | static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { |
2473 | if (C.ICmpType == SystemZICMP::UnsignedOnly) |
2474 | return; |
2475 | |
2476 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1.getNode()); |
2477 | if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64) |
2478 | return; |
2479 | |
2480 | int64_t Value = ConstOp1->getSExtValue(); |
2481 | if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || |
2482 | (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || |
2483 | (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || |
2484 | (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { |
2485 | C.CCMask ^= SystemZ::CCMASK_CMP_EQ; |
2486 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op1.getValueType()); |
2487 | } |
2488 | } |
2489 | |
2490 | // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, |
2491 | // adjust the operands as necessary. |
2492 | static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, |
2493 | Comparison &C) { |
2494 | // For us to make any changes, it must a comparison between a single-use |
2495 | // load and a constant. |
2496 | if (!C.Op0.hasOneUse() || |
2497 | C.Op0.getOpcode() != ISD::LOAD || |
2498 | C.Op1.getOpcode() != ISD::Constant) |
2499 | return; |
2500 | |
2501 | // We must have an 8- or 16-bit load. |
2502 | auto *Load = cast<LoadSDNode>(Val&: C.Op0); |
2503 | unsigned NumBits = Load->getMemoryVT().getSizeInBits(); |
2504 | if ((NumBits != 8 && NumBits != 16) || |
2505 | NumBits != Load->getMemoryVT().getStoreSizeInBits()) |
2506 | return; |
2507 | |
2508 | // The load must be an extending one and the constant must be within the |
2509 | // range of the unextended value. |
2510 | auto *ConstOp1 = cast<ConstantSDNode>(Val&: C.Op1); |
2511 | if (!ConstOp1 || ConstOp1->getValueSizeInBits(ResNo: 0) > 64) |
2512 | return; |
2513 | uint64_t Value = ConstOp1->getZExtValue(); |
2514 | uint64_t Mask = (1 << NumBits) - 1; |
2515 | if (Load->getExtensionType() == ISD::SEXTLOAD) { |
2516 | // Make sure that ConstOp1 is in range of C.Op0. |
2517 | int64_t SignedValue = ConstOp1->getSExtValue(); |
2518 | if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) |
2519 | return; |
2520 | if (C.ICmpType != SystemZICMP::SignedOnly) { |
2521 | // Unsigned comparison between two sign-extended values is equivalent |
2522 | // to unsigned comparison between two zero-extended values. |
2523 | Value &= Mask; |
2524 | } else if (NumBits == 8) { |
2525 | // Try to treat the comparison as unsigned, so that we can use CLI. |
2526 | // Adjust CCMask and Value as necessary. |
2527 | if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) |
2528 | // Test whether the high bit of the byte is set. |
2529 | Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; |
2530 | else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) |
2531 | // Test whether the high bit of the byte is clear. |
2532 | Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; |
2533 | else |
2534 | // No instruction exists for this combination. |
2535 | return; |
2536 | C.ICmpType = SystemZICMP::UnsignedOnly; |
2537 | } |
2538 | } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { |
2539 | if (Value > Mask) |
2540 | return; |
2541 | // If the constant is in range, we can use any comparison. |
2542 | C.ICmpType = SystemZICMP::Any; |
2543 | } else |
2544 | return; |
2545 | |
2546 | // Make sure that the first operand is an i32 of the right extension type. |
2547 | ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? |
2548 | ISD::SEXTLOAD : |
2549 | ISD::ZEXTLOAD); |
2550 | if (C.Op0.getValueType() != MVT::i32 || |
2551 | Load->getExtensionType() != ExtType) { |
2552 | C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), |
2553 | Load->getBasePtr(), Load->getPointerInfo(), |
2554 | Load->getMemoryVT(), Load->getAlign(), |
2555 | Load->getMemOperand()->getFlags()); |
2556 | // Update the chain uses. |
2557 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(Load, 1), To: C.Op0.getValue(R: 1)); |
2558 | } |
2559 | |
2560 | // Make sure that the second operand is an i32 with the right value. |
2561 | if (C.Op1.getValueType() != MVT::i32 || |
2562 | Value != ConstOp1->getZExtValue()) |
2563 | C.Op1 = DAG.getConstant(Value, DL, MVT::i32); |
2564 | } |
2565 | |
2566 | // Return true if Op is either an unextended load, or a load suitable |
2567 | // for integer register-memory comparisons of type ICmpType. |
2568 | static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { |
2569 | auto *Load = dyn_cast<LoadSDNode>(Val: Op.getNode()); |
2570 | if (Load) { |
2571 | // There are no instructions to compare a register with a memory byte. |
2572 | if (Load->getMemoryVT() == MVT::i8) |
2573 | return false; |
2574 | // Otherwise decide on extension type. |
2575 | switch (Load->getExtensionType()) { |
2576 | case ISD::NON_EXTLOAD: |
2577 | return true; |
2578 | case ISD::SEXTLOAD: |
2579 | return ICmpType != SystemZICMP::UnsignedOnly; |
2580 | case ISD::ZEXTLOAD: |
2581 | return ICmpType != SystemZICMP::SignedOnly; |
2582 | default: |
2583 | break; |
2584 | } |
2585 | } |
2586 | return false; |
2587 | } |
2588 | |
2589 | // Return true if it is better to swap the operands of C. |
2590 | static bool shouldSwapCmpOperands(const Comparison &C) { |
2591 | // Leave i128 and f128 comparisons alone, since they have no memory forms. |
2592 | if (C.Op0.getValueType() == MVT::i128) |
2593 | return false; |
2594 | if (C.Op0.getValueType() == MVT::f128) |
2595 | return false; |
2596 | |
2597 | // Always keep a floating-point constant second, since comparisons with |
2598 | // zero can use LOAD TEST and comparisons with other constants make a |
2599 | // natural memory operand. |
2600 | if (isa<ConstantFPSDNode>(Val: C.Op1)) |
2601 | return false; |
2602 | |
2603 | // Never swap comparisons with zero since there are many ways to optimize |
2604 | // those later. |
2605 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val: C.Op1); |
2606 | if (ConstOp1 && ConstOp1->getZExtValue() == 0) |
2607 | return false; |
2608 | |
2609 | // Also keep natural memory operands second if the loaded value is |
2610 | // only used here. Several comparisons have memory forms. |
2611 | if (isNaturalMemoryOperand(Op: C.Op1, ICmpType: C.ICmpType) && C.Op1.hasOneUse()) |
2612 | return false; |
2613 | |
2614 | // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. |
2615 | // In that case we generally prefer the memory to be second. |
2616 | if (isNaturalMemoryOperand(Op: C.Op0, ICmpType: C.ICmpType) && C.Op0.hasOneUse()) { |
2617 | // The only exceptions are when the second operand is a constant and |
2618 | // we can use things like CHHSI. |
2619 | if (!ConstOp1) |
2620 | return true; |
2621 | // The unsigned memory-immediate instructions can handle 16-bit |
2622 | // unsigned integers. |
2623 | if (C.ICmpType != SystemZICMP::SignedOnly && |
2624 | isUInt<16>(x: ConstOp1->getZExtValue())) |
2625 | return false; |
2626 | // The signed memory-immediate instructions can handle 16-bit |
2627 | // signed integers. |
2628 | if (C.ICmpType != SystemZICMP::UnsignedOnly && |
2629 | isInt<16>(x: ConstOp1->getSExtValue())) |
2630 | return false; |
2631 | return true; |
2632 | } |
2633 | |
2634 | // Try to promote the use of CGFR and CLGFR. |
2635 | unsigned Opcode0 = C.Op0.getOpcode(); |
2636 | if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) |
2637 | return true; |
2638 | if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) |
2639 | return true; |
2640 | if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && |
2641 | C.Op0.getOperand(i: 1).getOpcode() == ISD::Constant && |
2642 | C.Op0.getConstantOperandVal(i: 1) == 0xffffffff) |
2643 | return true; |
2644 | |
2645 | return false; |
2646 | } |
2647 | |
2648 | // Check whether C tests for equality between X and Y and whether X - Y |
2649 | // or Y - X is also computed. In that case it's better to compare the |
2650 | // result of the subtraction against zero. |
2651 | static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, |
2652 | Comparison &C) { |
2653 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
2654 | C.CCMask == SystemZ::CCMASK_CMP_NE) { |
2655 | for (SDNode *N : C.Op0->uses()) { |
2656 | if (N->getOpcode() == ISD::SUB && |
2657 | ((N->getOperand(Num: 0) == C.Op0 && N->getOperand(Num: 1) == C.Op1) || |
2658 | (N->getOperand(Num: 0) == C.Op1 && N->getOperand(Num: 1) == C.Op0))) { |
2659 | // Disable the nsw and nuw flags: the backend needs to handle |
2660 | // overflow as well during comparison elimination. |
2661 | SDNodeFlags Flags = N->getFlags(); |
2662 | Flags.setNoSignedWrap(false); |
2663 | Flags.setNoUnsignedWrap(false); |
2664 | N->setFlags(Flags); |
2665 | C.Op0 = SDValue(N, 0); |
2666 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: N->getValueType(ResNo: 0)); |
2667 | return; |
2668 | } |
2669 | } |
2670 | } |
2671 | } |
2672 | |
2673 | // Check whether C compares a floating-point value with zero and if that |
2674 | // floating-point value is also negated. In this case we can use the |
2675 | // negation to set CC, so avoiding separate LOAD AND TEST and |
2676 | // LOAD (NEGATIVE/COMPLEMENT) instructions. |
2677 | static void adjustForFNeg(Comparison &C) { |
2678 | // This optimization is invalid for strict comparisons, since FNEG |
2679 | // does not raise any exceptions. |
2680 | if (C.Chain) |
2681 | return; |
2682 | auto *C1 = dyn_cast<ConstantFPSDNode>(Val&: C.Op1); |
2683 | if (C1 && C1->isZero()) { |
2684 | for (SDNode *N : C.Op0->uses()) { |
2685 | if (N->getOpcode() == ISD::FNEG) { |
2686 | C.Op0 = SDValue(N, 0); |
2687 | C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask); |
2688 | return; |
2689 | } |
2690 | } |
2691 | } |
2692 | } |
2693 | |
2694 | // Check whether C compares (shl X, 32) with 0 and whether X is |
2695 | // also sign-extended. In that case it is better to test the result |
2696 | // of the sign extension using LTGFR. |
2697 | // |
2698 | // This case is important because InstCombine transforms a comparison |
2699 | // with (sext (trunc X)) into a comparison with (shl X, 32). |
2700 | static void adjustForLTGFR(Comparison &C) { |
2701 | // Check for a comparison between (shl X, 32) and 0. |
2702 | if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && |
2703 | C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { |
2704 | auto *C1 = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1)); |
2705 | if (C1 && C1->getZExtValue() == 32) { |
2706 | SDValue ShlOp0 = C.Op0.getOperand(i: 0); |
2707 | // See whether X has any SIGN_EXTEND_INREG uses. |
2708 | for (SDNode *N : ShlOp0->uses()) { |
2709 | if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && |
2710 | cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { |
2711 | C.Op0 = SDValue(N, 0); |
2712 | return; |
2713 | } |
2714 | } |
2715 | } |
2716 | } |
2717 | } |
2718 | |
2719 | // If C compares the truncation of an extending load, try to compare |
2720 | // the untruncated value instead. This exposes more opportunities to |
2721 | // reuse CC. |
2722 | static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, |
2723 | Comparison &C) { |
2724 | if (C.Op0.getOpcode() == ISD::TRUNCATE && |
2725 | C.Op0.getOperand(i: 0).getOpcode() == ISD::LOAD && |
2726 | C.Op1.getOpcode() == ISD::Constant && |
2727 | cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 && |
2728 | C.Op1->getAsZExtVal() == 0) { |
2729 | auto *L = cast<LoadSDNode>(Val: C.Op0.getOperand(i: 0)); |
2730 | if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= |
2731 | C.Op0.getValueSizeInBits().getFixedValue()) { |
2732 | unsigned Type = L->getExtensionType(); |
2733 | if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || |
2734 | (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { |
2735 | C.Op0 = C.Op0.getOperand(i: 0); |
2736 | C.Op1 = DAG.getConstant(Val: 0, DL, VT: C.Op0.getValueType()); |
2737 | } |
2738 | } |
2739 | } |
2740 | } |
2741 | |
2742 | // Return true if shift operation N has an in-range constant shift value. |
2743 | // Store it in ShiftVal if so. |
2744 | static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { |
2745 | auto *Shift = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1)); |
2746 | if (!Shift) |
2747 | return false; |
2748 | |
2749 | uint64_t Amount = Shift->getZExtValue(); |
2750 | if (Amount >= N.getValueSizeInBits()) |
2751 | return false; |
2752 | |
2753 | ShiftVal = Amount; |
2754 | return true; |
2755 | } |
2756 | |
2757 | // Check whether an AND with Mask is suitable for a TEST UNDER MASK |
2758 | // instruction and whether the CC value is descriptive enough to handle |
2759 | // a comparison of type Opcode between the AND result and CmpVal. |
2760 | // CCMask says which comparison result is being tested and BitSize is |
2761 | // the number of bits in the operands. If TEST UNDER MASK can be used, |
2762 | // return the corresponding CC mask, otherwise return 0. |
2763 | static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, |
2764 | uint64_t Mask, uint64_t CmpVal, |
2765 | unsigned ICmpType) { |
2766 | assert(Mask != 0 && "ANDs with zero should have been removed by now" ); |
2767 | |
2768 | // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. |
2769 | if (!SystemZ::isImmLL(Val: Mask) && !SystemZ::isImmLH(Val: Mask) && |
2770 | !SystemZ::isImmHL(Val: Mask) && !SystemZ::isImmHH(Val: Mask)) |
2771 | return 0; |
2772 | |
2773 | // Work out the masks for the lowest and highest bits. |
2774 | uint64_t High = llvm::bit_floor(Value: Mask); |
2775 | uint64_t Low = uint64_t(1) << llvm::countr_zero(Val: Mask); |
2776 | |
2777 | // Signed ordered comparisons are effectively unsigned if the sign |
2778 | // bit is dropped. |
2779 | bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); |
2780 | |
2781 | // Check for equality comparisons with 0, or the equivalent. |
2782 | if (CmpVal == 0) { |
2783 | if (CCMask == SystemZ::CCMASK_CMP_EQ) |
2784 | return SystemZ::CCMASK_TM_ALL_0; |
2785 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
2786 | return SystemZ::CCMASK_TM_SOME_1; |
2787 | } |
2788 | if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { |
2789 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
2790 | return SystemZ::CCMASK_TM_ALL_0; |
2791 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
2792 | return SystemZ::CCMASK_TM_SOME_1; |
2793 | } |
2794 | if (EffectivelyUnsigned && CmpVal < Low) { |
2795 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
2796 | return SystemZ::CCMASK_TM_ALL_0; |
2797 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
2798 | return SystemZ::CCMASK_TM_SOME_1; |
2799 | } |
2800 | |
2801 | // Check for equality comparisons with the mask, or the equivalent. |
2802 | if (CmpVal == Mask) { |
2803 | if (CCMask == SystemZ::CCMASK_CMP_EQ) |
2804 | return SystemZ::CCMASK_TM_ALL_1; |
2805 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
2806 | return SystemZ::CCMASK_TM_SOME_0; |
2807 | } |
2808 | if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { |
2809 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
2810 | return SystemZ::CCMASK_TM_ALL_1; |
2811 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
2812 | return SystemZ::CCMASK_TM_SOME_0; |
2813 | } |
2814 | if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { |
2815 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
2816 | return SystemZ::CCMASK_TM_ALL_1; |
2817 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
2818 | return SystemZ::CCMASK_TM_SOME_0; |
2819 | } |
2820 | |
2821 | // Check for ordered comparisons with the top bit. |
2822 | if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { |
2823 | if (CCMask == SystemZ::CCMASK_CMP_LE) |
2824 | return SystemZ::CCMASK_TM_MSB_0; |
2825 | if (CCMask == SystemZ::CCMASK_CMP_GT) |
2826 | return SystemZ::CCMASK_TM_MSB_1; |
2827 | } |
2828 | if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { |
2829 | if (CCMask == SystemZ::CCMASK_CMP_LT) |
2830 | return SystemZ::CCMASK_TM_MSB_0; |
2831 | if (CCMask == SystemZ::CCMASK_CMP_GE) |
2832 | return SystemZ::CCMASK_TM_MSB_1; |
2833 | } |
2834 | |
2835 | // If there are just two bits, we can do equality checks for Low and High |
2836 | // as well. |
2837 | if (Mask == Low + High) { |
2838 | if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) |
2839 | return SystemZ::CCMASK_TM_MIXED_MSB_0; |
2840 | if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) |
2841 | return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; |
2842 | if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) |
2843 | return SystemZ::CCMASK_TM_MIXED_MSB_1; |
2844 | if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) |
2845 | return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; |
2846 | } |
2847 | |
2848 | // Looks like we've exhausted our options. |
2849 | return 0; |
2850 | } |
2851 | |
2852 | // See whether C can be implemented as a TEST UNDER MASK instruction. |
2853 | // Update the arguments with the TM version if so. |
2854 | static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, |
2855 | Comparison &C) { |
2856 | // Use VECTOR TEST UNDER MASK for i128 operations. |
2857 | if (C.Op0.getValueType() == MVT::i128) { |
2858 | // We can use VTM for EQ/NE comparisons of x & y against 0. |
2859 | if (C.Op0.getOpcode() == ISD::AND && |
2860 | (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
2861 | C.CCMask == SystemZ::CCMASK_CMP_NE)) { |
2862 | auto *Mask = dyn_cast<ConstantSDNode>(Val&: C.Op1); |
2863 | if (Mask && Mask->getAPIntValue() == 0) { |
2864 | C.Opcode = SystemZISD::VTM; |
2865 | C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); |
2866 | C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); |
2867 | C.CCValid = SystemZ::CCMASK_VCMP; |
2868 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ) |
2869 | C.CCMask = SystemZ::CCMASK_VCMP_ALL; |
2870 | else |
2871 | C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; |
2872 | } |
2873 | } |
2874 | return; |
2875 | } |
2876 | |
2877 | // Check that we have a comparison with a constant. |
2878 | auto *ConstOp1 = dyn_cast<ConstantSDNode>(Val&: C.Op1); |
2879 | if (!ConstOp1) |
2880 | return; |
2881 | uint64_t CmpVal = ConstOp1->getZExtValue(); |
2882 | |
2883 | // Check whether the nonconstant input is an AND with a constant mask. |
2884 | Comparison NewC(C); |
2885 | uint64_t MaskVal; |
2886 | ConstantSDNode *Mask = nullptr; |
2887 | if (C.Op0.getOpcode() == ISD::AND) { |
2888 | NewC.Op0 = C.Op0.getOperand(i: 0); |
2889 | NewC.Op1 = C.Op0.getOperand(i: 1); |
2890 | Mask = dyn_cast<ConstantSDNode>(Val&: NewC.Op1); |
2891 | if (!Mask) |
2892 | return; |
2893 | MaskVal = Mask->getZExtValue(); |
2894 | } else { |
2895 | // There is no instruction to compare with a 64-bit immediate |
2896 | // so use TMHH instead if possible. We need an unsigned ordered |
2897 | // comparison with an i64 immediate. |
2898 | if (NewC.Op0.getValueType() != MVT::i64 || |
2899 | NewC.CCMask == SystemZ::CCMASK_CMP_EQ || |
2900 | NewC.CCMask == SystemZ::CCMASK_CMP_NE || |
2901 | NewC.ICmpType == SystemZICMP::SignedOnly) |
2902 | return; |
2903 | // Convert LE and GT comparisons into LT and GE. |
2904 | if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || |
2905 | NewC.CCMask == SystemZ::CCMASK_CMP_GT) { |
2906 | if (CmpVal == uint64_t(-1)) |
2907 | return; |
2908 | CmpVal += 1; |
2909 | NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; |
2910 | } |
2911 | // If the low N bits of Op1 are zero than the low N bits of Op0 can |
2912 | // be masked off without changing the result. |
2913 | MaskVal = -(CmpVal & -CmpVal); |
2914 | NewC.ICmpType = SystemZICMP::UnsignedOnly; |
2915 | } |
2916 | if (!MaskVal) |
2917 | return; |
2918 | |
2919 | // Check whether the combination of mask, comparison value and comparison |
2920 | // type are suitable. |
2921 | unsigned BitSize = NewC.Op0.getValueSizeInBits(); |
2922 | unsigned NewCCMask, ShiftVal; |
2923 | if (NewC.ICmpType != SystemZICMP::SignedOnly && |
2924 | NewC.Op0.getOpcode() == ISD::SHL && |
2925 | isSimpleShift(N: NewC.Op0, ShiftVal) && |
2926 | (MaskVal >> ShiftVal != 0) && |
2927 | ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && |
2928 | (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, |
2929 | Mask: MaskVal >> ShiftVal, |
2930 | CmpVal: CmpVal >> ShiftVal, |
2931 | ICmpType: SystemZICMP::Any))) { |
2932 | NewC.Op0 = NewC.Op0.getOperand(i: 0); |
2933 | MaskVal >>= ShiftVal; |
2934 | } else if (NewC.ICmpType != SystemZICMP::SignedOnly && |
2935 | NewC.Op0.getOpcode() == ISD::SRL && |
2936 | isSimpleShift(N: NewC.Op0, ShiftVal) && |
2937 | (MaskVal << ShiftVal != 0) && |
2938 | ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && |
2939 | (NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, |
2940 | Mask: MaskVal << ShiftVal, |
2941 | CmpVal: CmpVal << ShiftVal, |
2942 | ICmpType: SystemZICMP::UnsignedOnly))) { |
2943 | NewC.Op0 = NewC.Op0.getOperand(i: 0); |
2944 | MaskVal <<= ShiftVal; |
2945 | } else { |
2946 | NewCCMask = getTestUnderMaskCond(BitSize, CCMask: NewC.CCMask, Mask: MaskVal, CmpVal, |
2947 | ICmpType: NewC.ICmpType); |
2948 | if (!NewCCMask) |
2949 | return; |
2950 | } |
2951 | |
2952 | // Go ahead and make the change. |
2953 | C.Opcode = SystemZISD::TM; |
2954 | C.Op0 = NewC.Op0; |
2955 | if (Mask && Mask->getZExtValue() == MaskVal) |
2956 | C.Op1 = SDValue(Mask, 0); |
2957 | else |
2958 | C.Op1 = DAG.getConstant(Val: MaskVal, DL, VT: C.Op0.getValueType()); |
2959 | C.CCValid = SystemZ::CCMASK_TM; |
2960 | C.CCMask = NewCCMask; |
2961 | } |
2962 | |
2963 | // Implement i128 comparison in vector registers. |
2964 | static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, |
2965 | Comparison &C) { |
2966 | if (C.Opcode != SystemZISD::ICMP) |
2967 | return; |
2968 | if (C.Op0.getValueType() != MVT::i128) |
2969 | return; |
2970 | |
2971 | // (In-)Equality comparisons can be implemented via VCEQGS. |
2972 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
2973 | C.CCMask == SystemZ::CCMASK_CMP_NE) { |
2974 | C.Opcode = SystemZISD::VICMPES; |
2975 | C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); |
2976 | C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); |
2977 | C.CCValid = SystemZ::CCMASK_VCMP; |
2978 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ) |
2979 | C.CCMask = SystemZ::CCMASK_VCMP_ALL; |
2980 | else |
2981 | C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; |
2982 | return; |
2983 | } |
2984 | |
2985 | // Normalize other comparisons to GT. |
2986 | bool Swap = false, Invert = false; |
2987 | switch (C.CCMask) { |
2988 | case SystemZ::CCMASK_CMP_GT: break; |
2989 | case SystemZ::CCMASK_CMP_LT: Swap = true; break; |
2990 | case SystemZ::CCMASK_CMP_LE: Invert = true; break; |
2991 | case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; |
2992 | default: llvm_unreachable("Invalid integer condition!" ); |
2993 | } |
2994 | if (Swap) |
2995 | std::swap(a&: C.Op0, b&: C.Op1); |
2996 | |
2997 | if (C.ICmpType == SystemZICMP::UnsignedOnly) |
2998 | C.Opcode = SystemZISD::UCMP128HI; |
2999 | else |
3000 | C.Opcode = SystemZISD::SCMP128HI; |
3001 | C.CCValid = SystemZ::CCMASK_ANY; |
3002 | C.CCMask = SystemZ::CCMASK_1; |
3003 | |
3004 | if (Invert) |
3005 | C.CCMask ^= C.CCValid; |
3006 | } |
3007 | |
3008 | // See whether the comparison argument contains a redundant AND |
3009 | // and remove it if so. This sometimes happens due to the generic |
3010 | // BRCOND expansion. |
3011 | static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, |
3012 | Comparison &C) { |
3013 | if (C.Op0.getOpcode() != ISD::AND) |
3014 | return; |
3015 | auto *Mask = dyn_cast<ConstantSDNode>(Val: C.Op0.getOperand(i: 1)); |
3016 | if (!Mask || Mask->getValueSizeInBits(ResNo: 0) > 64) |
3017 | return; |
3018 | KnownBits Known = DAG.computeKnownBits(Op: C.Op0.getOperand(i: 0)); |
3019 | if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) |
3020 | return; |
3021 | |
3022 | C.Op0 = C.Op0.getOperand(i: 0); |
3023 | } |
3024 | |
3025 | // Return a Comparison that tests the condition-code result of intrinsic |
3026 | // node Call against constant integer CC using comparison code Cond. |
3027 | // Opcode is the opcode of the SystemZISD operation for the intrinsic |
3028 | // and CCValid is the set of possible condition-code results. |
3029 | static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, |
3030 | SDValue Call, unsigned CCValid, uint64_t CC, |
3031 | ISD::CondCode Cond) { |
3032 | Comparison C(Call, SDValue(), SDValue()); |
3033 | C.Opcode = Opcode; |
3034 | C.CCValid = CCValid; |
3035 | if (Cond == ISD::SETEQ) |
3036 | // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. |
3037 | C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; |
3038 | else if (Cond == ISD::SETNE) |
3039 | // ...and the inverse of that. |
3040 | C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; |
3041 | else if (Cond == ISD::SETLT || Cond == ISD::SETULT) |
3042 | // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, |
3043 | // always true for CC>3. |
3044 | C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; |
3045 | else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) |
3046 | // ...and the inverse of that. |
3047 | C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; |
3048 | else if (Cond == ISD::SETLE || Cond == ISD::SETULE) |
3049 | // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), |
3050 | // always true for CC>3. |
3051 | C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; |
3052 | else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) |
3053 | // ...and the inverse of that. |
3054 | C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; |
3055 | else |
3056 | llvm_unreachable("Unexpected integer comparison type" ); |
3057 | C.CCMask &= CCValid; |
3058 | return C; |
3059 | } |
3060 | |
3061 | // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. |
3062 | static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, |
3063 | ISD::CondCode Cond, const SDLoc &DL, |
3064 | SDValue Chain = SDValue(), |
3065 | bool IsSignaling = false) { |
3066 | if (CmpOp1.getOpcode() == ISD::Constant) { |
3067 | assert(!Chain); |
3068 | unsigned Opcode, CCValid; |
3069 | if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && |
3070 | CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(NUses: 1, Value: 0) && |
3071 | isIntrinsicWithCCAndChain(Op: CmpOp0, Opcode, CCValid)) |
3072 | return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid, |
3073 | CC: CmpOp1->getAsZExtVal(), Cond); |
3074 | if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && |
3075 | CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && |
3076 | isIntrinsicWithCC(Op: CmpOp0, Opcode, CCValid)) |
3077 | return getIntrinsicCmp(DAG, Opcode, Call: CmpOp0, CCValid, |
3078 | CC: CmpOp1->getAsZExtVal(), Cond); |
3079 | } |
3080 | Comparison C(CmpOp0, CmpOp1, Chain); |
3081 | C.CCMask = CCMaskForCondCode(CC: Cond); |
3082 | if (C.Op0.getValueType().isFloatingPoint()) { |
3083 | C.CCValid = SystemZ::CCMASK_FCMP; |
3084 | if (!C.Chain) |
3085 | C.Opcode = SystemZISD::FCMP; |
3086 | else if (!IsSignaling) |
3087 | C.Opcode = SystemZISD::STRICT_FCMP; |
3088 | else |
3089 | C.Opcode = SystemZISD::STRICT_FCMPS; |
3090 | adjustForFNeg(C); |
3091 | } else { |
3092 | assert(!C.Chain); |
3093 | C.CCValid = SystemZ::CCMASK_ICMP; |
3094 | C.Opcode = SystemZISD::ICMP; |
3095 | // Choose the type of comparison. Equality and inequality tests can |
3096 | // use either signed or unsigned comparisons. The choice also doesn't |
3097 | // matter if both sign bits are known to be clear. In those cases we |
3098 | // want to give the main isel code the freedom to choose whichever |
3099 | // form fits best. |
3100 | if (C.CCMask == SystemZ::CCMASK_CMP_EQ || |
3101 | C.CCMask == SystemZ::CCMASK_CMP_NE || |
3102 | (DAG.SignBitIsZero(Op: C.Op0) && DAG.SignBitIsZero(Op: C.Op1))) |
3103 | C.ICmpType = SystemZICMP::Any; |
3104 | else if (C.CCMask & SystemZ::CCMASK_CMP_UO) |
3105 | C.ICmpType = SystemZICMP::UnsignedOnly; |
3106 | else |
3107 | C.ICmpType = SystemZICMP::SignedOnly; |
3108 | C.CCMask &= ~SystemZ::CCMASK_CMP_UO; |
3109 | adjustForRedundantAnd(DAG, DL, C); |
3110 | adjustZeroCmp(DAG, DL, C); |
3111 | adjustSubwordCmp(DAG, DL, C); |
3112 | adjustForSubtraction(DAG, DL, C); |
3113 | adjustForLTGFR(C); |
3114 | adjustICmpTruncate(DAG, DL, C); |
3115 | } |
3116 | |
3117 | if (shouldSwapCmpOperands(C)) { |
3118 | std::swap(a&: C.Op0, b&: C.Op1); |
3119 | C.CCMask = SystemZ::reverseCCMask(CCMask: C.CCMask); |
3120 | } |
3121 | |
3122 | adjustForTestUnderMask(DAG, DL, C); |
3123 | adjustICmp128(DAG, DL, C); |
3124 | return C; |
3125 | } |
3126 | |
3127 | // Emit the comparison instruction described by C. |
3128 | static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { |
3129 | if (!C.Op1.getNode()) { |
3130 | SDNode *Node; |
3131 | switch (C.Op0.getOpcode()) { |
3132 | case ISD::INTRINSIC_W_CHAIN: |
3133 | Node = emitIntrinsicWithCCAndChain(DAG, Op: C.Op0, Opcode: C.Opcode); |
3134 | return SDValue(Node, 0); |
3135 | case ISD::INTRINSIC_WO_CHAIN: |
3136 | Node = emitIntrinsicWithCC(DAG, Op: C.Op0, Opcode: C.Opcode); |
3137 | return SDValue(Node, Node->getNumValues() - 1); |
3138 | default: |
3139 | llvm_unreachable("Invalid comparison operands" ); |
3140 | } |
3141 | } |
3142 | if (C.Opcode == SystemZISD::ICMP) |
3143 | return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, |
3144 | DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); |
3145 | if (C.Opcode == SystemZISD::TM) { |
3146 | bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != |
3147 | bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); |
3148 | return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, |
3149 | DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); |
3150 | } |
3151 | if (C.Opcode == SystemZISD::VICMPES) { |
3152 | SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); |
3153 | SDValue Val = DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Op0, N2: C.Op1); |
3154 | return SDValue(Val.getNode(), 1); |
3155 | } |
3156 | if (C.Chain) { |
3157 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
3158 | return DAG.getNode(Opcode: C.Opcode, DL, VTList: VTs, N1: C.Chain, N2: C.Op0, N3: C.Op1); |
3159 | } |
3160 | return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); |
3161 | } |
3162 | |
3163 | // Implement a 32-bit *MUL_LOHI operation by extending both operands to |
3164 | // 64 bits. Extend is the extension type to use. Store the high part |
3165 | // in Hi and the low part in Lo. |
3166 | static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, |
3167 | SDValue Op0, SDValue Op1, SDValue &Hi, |
3168 | SDValue &Lo) { |
3169 | Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); |
3170 | Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); |
3171 | SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); |
3172 | Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, |
3173 | DAG.getConstant(32, DL, MVT::i64)); |
3174 | Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); |
3175 | Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); |
3176 | } |
3177 | |
3178 | // Lower a binary operation that produces two VT results, one in each |
3179 | // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, |
3180 | // and Opcode performs the GR128 operation. Store the even register result |
3181 | // in Even and the odd register result in Odd. |
3182 | static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
3183 | unsigned Opcode, SDValue Op0, SDValue Op1, |
3184 | SDValue &Even, SDValue &Odd) { |
3185 | SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); |
3186 | bool Is32Bit = is32Bit(VT); |
3187 | Even = DAG.getTargetExtractSubreg(SRIdx: SystemZ::even128(Is32bit: Is32Bit), DL, VT, Operand: Result); |
3188 | Odd = DAG.getTargetExtractSubreg(SRIdx: SystemZ::odd128(Is32bit: Is32Bit), DL, VT, Operand: Result); |
3189 | } |
3190 | |
3191 | // Return an i32 value that is 1 if the CC value produced by CCReg is |
3192 | // in the mask CCMask and 0 otherwise. CC is known to have a value |
3193 | // in CCValid, so other values can be ignored. |
3194 | static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, |
3195 | unsigned CCValid, unsigned CCMask) { |
3196 | SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), |
3197 | DAG.getConstant(0, DL, MVT::i32), |
3198 | DAG.getTargetConstant(CCValid, DL, MVT::i32), |
3199 | DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; |
3200 | return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); |
3201 | } |
3202 | |
3203 | // Return the SystemISD vector comparison operation for CC, or 0 if it cannot |
3204 | // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP |
3205 | // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) |
3206 | // floating-point comparisons, and CmpMode::SignalingFP for strict signaling |
3207 | // floating-point comparisons. |
3208 | enum class CmpMode { Int, FP, StrictFP, SignalingFP }; |
3209 | static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { |
3210 | switch (CC) { |
3211 | case ISD::SETOEQ: |
3212 | case ISD::SETEQ: |
3213 | switch (Mode) { |
3214 | case CmpMode::Int: return SystemZISD::VICMPE; |
3215 | case CmpMode::FP: return SystemZISD::VFCMPE; |
3216 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; |
3217 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; |
3218 | } |
3219 | llvm_unreachable("Bad mode" ); |
3220 | |
3221 | case ISD::SETOGE: |
3222 | case ISD::SETGE: |
3223 | switch (Mode) { |
3224 | case CmpMode::Int: return 0; |
3225 | case CmpMode::FP: return SystemZISD::VFCMPHE; |
3226 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; |
3227 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; |
3228 | } |
3229 | llvm_unreachable("Bad mode" ); |
3230 | |
3231 | case ISD::SETOGT: |
3232 | case ISD::SETGT: |
3233 | switch (Mode) { |
3234 | case CmpMode::Int: return SystemZISD::VICMPH; |
3235 | case CmpMode::FP: return SystemZISD::VFCMPH; |
3236 | case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; |
3237 | case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; |
3238 | } |
3239 | llvm_unreachable("Bad mode" ); |
3240 | |
3241 | case ISD::SETUGT: |
3242 | switch (Mode) { |
3243 | case CmpMode::Int: return SystemZISD::VICMPHL; |
3244 | case CmpMode::FP: return 0; |
3245 | case CmpMode::StrictFP: return 0; |
3246 | case CmpMode::SignalingFP: return 0; |
3247 | } |
3248 | llvm_unreachable("Bad mode" ); |
3249 | |
3250 | default: |
3251 | return 0; |
3252 | } |
3253 | } |
3254 | |
3255 | // Return the SystemZISD vector comparison operation for CC or its inverse, |
3256 | // or 0 if neither can be done directly. Indicate in Invert whether the |
3257 | // result is for the inverse of CC. Mode is as above. |
3258 | static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, |
3259 | bool &Invert) { |
3260 | if (unsigned Opcode = getVectorComparison(CC, Mode)) { |
3261 | Invert = false; |
3262 | return Opcode; |
3263 | } |
3264 | |
3265 | CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); |
3266 | if (unsigned Opcode = getVectorComparison(CC, Mode)) { |
3267 | Invert = true; |
3268 | return Opcode; |
3269 | } |
3270 | |
3271 | return 0; |
3272 | } |
3273 | |
3274 | // Return a v2f64 that contains the extended form of elements Start and Start+1 |
3275 | // of v4f32 value Op. If Chain is nonnull, return the strict form. |
3276 | static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, |
3277 | SDValue Op, SDValue Chain) { |
3278 | int Mask[] = { Start, -1, Start + 1, -1 }; |
3279 | Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); |
3280 | if (Chain) { |
3281 | SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); |
3282 | return DAG.getNode(Opcode: SystemZISD::STRICT_VEXTEND, DL, VTList: VTs, N1: Chain, N2: Op); |
3283 | } |
3284 | return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); |
3285 | } |
3286 | |
3287 | // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, |
3288 | // producing a result of type VT. If Chain is nonnull, return the strict form. |
3289 | SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, |
3290 | const SDLoc &DL, EVT VT, |
3291 | SDValue CmpOp0, |
3292 | SDValue CmpOp1, |
3293 | SDValue Chain) const { |
3294 | // There is no hardware support for v4f32 (unless we have the vector |
3295 | // enhancements facility 1), so extend the vector into two v2f64s |
3296 | // and compare those. |
3297 | if (CmpOp0.getValueType() == MVT::v4f32 && |
3298 | !Subtarget.hasVectorEnhancements1()) { |
3299 | SDValue H0 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp0, Chain); |
3300 | SDValue L0 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp0, Chain); |
3301 | SDValue H1 = expandV4F32ToV2F64(DAG, Start: 0, DL, Op: CmpOp1, Chain); |
3302 | SDValue L1 = expandV4F32ToV2F64(DAG, Start: 2, DL, Op: CmpOp1, Chain); |
3303 | if (Chain) { |
3304 | SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); |
3305 | SDValue HRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: H0, N3: H1); |
3306 | SDValue LRes = DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: L0, N3: L1); |
3307 | SDValue Res = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes); |
3308 | SDValue Chains[6] = { H0.getValue(R: 1), L0.getValue(R: 1), |
3309 | H1.getValue(R: 1), L1.getValue(R: 1), |
3310 | HRes.getValue(R: 1), LRes.getValue(R: 1) }; |
3311 | SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
3312 | SDValue Ops[2] = { Res, NewChain }; |
3313 | return DAG.getMergeValues(Ops, dl: DL); |
3314 | } |
3315 | SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); |
3316 | SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); |
3317 | return DAG.getNode(Opcode: SystemZISD::PACK, DL, VT, N1: HRes, N2: LRes); |
3318 | } |
3319 | if (Chain) { |
3320 | SDVTList VTs = DAG.getVTList(VT, MVT::Other); |
3321 | return DAG.getNode(Opcode, DL, VTList: VTs, N1: Chain, N2: CmpOp0, N3: CmpOp1); |
3322 | } |
3323 | return DAG.getNode(Opcode, DL, VT, N1: CmpOp0, N2: CmpOp1); |
3324 | } |
3325 | |
3326 | // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing |
3327 | // an integer mask of type VT. If Chain is nonnull, we have a strict |
3328 | // floating-point comparison. If in addition IsSignaling is true, we have |
3329 | // a strict signaling floating-point comparison. |
3330 | SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, |
3331 | const SDLoc &DL, EVT VT, |
3332 | ISD::CondCode CC, |
3333 | SDValue CmpOp0, |
3334 | SDValue CmpOp1, |
3335 | SDValue Chain, |
3336 | bool IsSignaling) const { |
3337 | bool IsFP = CmpOp0.getValueType().isFloatingPoint(); |
3338 | assert (!Chain || IsFP); |
3339 | assert (!IsSignaling || Chain); |
3340 | CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : |
3341 | Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; |
3342 | bool Invert = false; |
3343 | SDValue Cmp; |
3344 | switch (CC) { |
3345 | // Handle tests for order using (or (ogt y x) (oge x y)). |
3346 | case ISD::SETUO: |
3347 | Invert = true; |
3348 | [[fallthrough]]; |
3349 | case ISD::SETO: { |
3350 | assert(IsFP && "Unexpected integer comparison" ); |
3351 | SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3352 | DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3353 | SDValue GE = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGE, Mode), |
3354 | DL, VT, CmpOp0, CmpOp1, Chain); |
3355 | Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GE); |
3356 | if (Chain) |
3357 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
3358 | LT.getValue(1), GE.getValue(1)); |
3359 | break; |
3360 | } |
3361 | |
3362 | // Handle <> tests using (or (ogt y x) (ogt x y)). |
3363 | case ISD::SETUEQ: |
3364 | Invert = true; |
3365 | [[fallthrough]]; |
3366 | case ISD::SETONE: { |
3367 | assert(IsFP && "Unexpected integer comparison" ); |
3368 | SDValue LT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3369 | DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3370 | SDValue GT = getVectorCmp(DAG, Opcode: getVectorComparison(CC: ISD::SETOGT, Mode), |
3371 | DL, VT, CmpOp0, CmpOp1, Chain); |
3372 | Cmp = DAG.getNode(Opcode: ISD::OR, DL, VT, N1: LT, N2: GT); |
3373 | if (Chain) |
3374 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
3375 | LT.getValue(1), GT.getValue(1)); |
3376 | break; |
3377 | } |
3378 | |
3379 | // Otherwise a single comparison is enough. It doesn't really |
3380 | // matter whether we try the inversion or the swap first, since |
3381 | // there are no cases where both work. |
3382 | default: |
3383 | if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) |
3384 | Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); |
3385 | else { |
3386 | CC = ISD::getSetCCSwappedOperands(Operation: CC); |
3387 | if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) |
3388 | Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0: CmpOp1, CmpOp1: CmpOp0, Chain); |
3389 | else |
3390 | llvm_unreachable("Unhandled comparison" ); |
3391 | } |
3392 | if (Chain) |
3393 | Chain = Cmp.getValue(R: 1); |
3394 | break; |
3395 | } |
3396 | if (Invert) { |
3397 | SDValue Mask = |
3398 | DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); |
3399 | Cmp = DAG.getNode(Opcode: ISD::XOR, DL, VT, N1: Cmp, N2: Mask); |
3400 | } |
3401 | if (Chain && Chain.getNode() != Cmp.getNode()) { |
3402 | SDValue Ops[2] = { Cmp, Chain }; |
3403 | Cmp = DAG.getMergeValues(Ops, dl: DL); |
3404 | } |
3405 | return Cmp; |
3406 | } |
3407 | |
3408 | SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, |
3409 | SelectionDAG &DAG) const { |
3410 | SDValue CmpOp0 = Op.getOperand(i: 0); |
3411 | SDValue CmpOp1 = Op.getOperand(i: 1); |
3412 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 2))->get(); |
3413 | SDLoc DL(Op); |
3414 | EVT VT = Op.getValueType(); |
3415 | if (VT.isVector()) |
3416 | return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); |
3417 | |
3418 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3419 | SDValue CCReg = emitCmp(DAG, DL, C); |
3420 | return emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask); |
3421 | } |
3422 | |
3423 | SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, |
3424 | SelectionDAG &DAG, |
3425 | bool IsSignaling) const { |
3426 | SDValue Chain = Op.getOperand(i: 0); |
3427 | SDValue CmpOp0 = Op.getOperand(i: 1); |
3428 | SDValue CmpOp1 = Op.getOperand(i: 2); |
3429 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 3))->get(); |
3430 | SDLoc DL(Op); |
3431 | EVT VT = Op.getNode()->getValueType(ResNo: 0); |
3432 | if (VT.isVector()) { |
3433 | SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, |
3434 | Chain, IsSignaling); |
3435 | return Res.getValue(R: Op.getResNo()); |
3436 | } |
3437 | |
3438 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL, Chain, IsSignaling)); |
3439 | SDValue CCReg = emitCmp(DAG, DL, C); |
3440 | CCReg->setFlags(Op->getFlags()); |
3441 | SDValue Result = emitSETCC(DAG, DL, CCReg, CCValid: C.CCValid, CCMask: C.CCMask); |
3442 | SDValue Ops[2] = { Result, CCReg.getValue(R: 1) }; |
3443 | return DAG.getMergeValues(Ops, dl: DL); |
3444 | } |
3445 | |
3446 | SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { |
3447 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 1))->get(); |
3448 | SDValue CmpOp0 = Op.getOperand(i: 2); |
3449 | SDValue CmpOp1 = Op.getOperand(i: 3); |
3450 | SDValue Dest = Op.getOperand(i: 4); |
3451 | SDLoc DL(Op); |
3452 | |
3453 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3454 | SDValue CCReg = emitCmp(DAG, DL, C); |
3455 | return DAG.getNode( |
3456 | SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), |
3457 | DAG.getTargetConstant(C.CCValid, DL, MVT::i32), |
3458 | DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); |
3459 | } |
3460 | |
3461 | // Return true if Pos is CmpOp and Neg is the negative of CmpOp, |
3462 | // allowing Pos and Neg to be wider than CmpOp. |
3463 | static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { |
3464 | return (Neg.getOpcode() == ISD::SUB && |
3465 | Neg.getOperand(i: 0).getOpcode() == ISD::Constant && |
3466 | Neg.getConstantOperandVal(i: 0) == 0 && Neg.getOperand(i: 1) == Pos && |
3467 | (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && |
3468 | Pos.getOperand(i: 0) == CmpOp))); |
3469 | } |
3470 | |
3471 | // Return the absolute or negative absolute of Op; IsNegative decides which. |
3472 | static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, |
3473 | bool IsNegative) { |
3474 | Op = DAG.getNode(Opcode: ISD::ABS, DL, VT: Op.getValueType(), Operand: Op); |
3475 | if (IsNegative) |
3476 | Op = DAG.getNode(Opcode: ISD::SUB, DL, VT: Op.getValueType(), |
3477 | N1: DAG.getConstant(Val: 0, DL, VT: Op.getValueType()), N2: Op); |
3478 | return Op; |
3479 | } |
3480 | |
3481 | SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, |
3482 | SelectionDAG &DAG) const { |
3483 | SDValue CmpOp0 = Op.getOperand(i: 0); |
3484 | SDValue CmpOp1 = Op.getOperand(i: 1); |
3485 | SDValue TrueOp = Op.getOperand(i: 2); |
3486 | SDValue FalseOp = Op.getOperand(i: 3); |
3487 | ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get(); |
3488 | SDLoc DL(Op); |
3489 | |
3490 | Comparison C(getCmp(DAG, CmpOp0, CmpOp1, Cond: CC, DL)); |
3491 | |
3492 | // Check for absolute and negative-absolute selections, including those |
3493 | // where the comparison value is sign-extended (for LPGFR and LNGFR). |
3494 | // This check supplements the one in DAGCombiner. |
3495 | if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && |
3496 | C.CCMask != SystemZ::CCMASK_CMP_NE && |
3497 | C.Op1.getOpcode() == ISD::Constant && |
3498 | cast<ConstantSDNode>(Val&: C.Op1)->getValueSizeInBits(ResNo: 0) <= 64 && |
3499 | C.Op1->getAsZExtVal() == 0) { |
3500 | if (isAbsolute(CmpOp: C.Op0, Pos: TrueOp, Neg: FalseOp)) |
3501 | return getAbsolute(DAG, DL, Op: TrueOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_LT); |
3502 | if (isAbsolute(CmpOp: C.Op0, Pos: FalseOp, Neg: TrueOp)) |
3503 | return getAbsolute(DAG, DL, Op: FalseOp, IsNegative: C.CCMask & SystemZ::CCMASK_CMP_GT); |
3504 | } |
3505 | |
3506 | SDValue CCReg = emitCmp(DAG, DL, C); |
3507 | SDValue Ops[] = {TrueOp, FalseOp, |
3508 | DAG.getTargetConstant(C.CCValid, DL, MVT::i32), |
3509 | DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; |
3510 | |
3511 | return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); |
3512 | } |
3513 | |
3514 | SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, |
3515 | SelectionDAG &DAG) const { |
3516 | SDLoc DL(Node); |
3517 | const GlobalValue *GV = Node->getGlobal(); |
3518 | int64_t Offset = Node->getOffset(); |
3519 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3520 | CodeModel::Model CM = DAG.getTarget().getCodeModel(); |
3521 | |
3522 | SDValue Result; |
3523 | if (Subtarget.isPC32DBLSymbol(GV, CM)) { |
3524 | if (isInt<32>(x: Offset)) { |
3525 | // Assign anchors at 1<<12 byte boundaries. |
3526 | uint64_t Anchor = Offset & ~uint64_t(0xfff); |
3527 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor); |
3528 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3529 | |
3530 | // The offset can be folded into the address if it is aligned to a |
3531 | // halfword. |
3532 | Offset -= Anchor; |
3533 | if (Offset != 0 && (Offset & 1) == 0) { |
3534 | SDValue Full = |
3535 | DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: Anchor + Offset); |
3536 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_OFFSET, DL, VT: PtrVT, N1: Full, N2: Result); |
3537 | Offset = 0; |
3538 | } |
3539 | } else { |
3540 | // Conservatively load a constant offset greater than 32 bits into a |
3541 | // register below. |
3542 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT); |
3543 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3544 | } |
3545 | } else if (Subtarget.isTargetELF()) { |
3546 | Result = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, TargetFlags: SystemZII::MO_GOT); |
3547 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3548 | Result = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Result, |
3549 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
3550 | } else if (Subtarget.isTargetzOS()) { |
3551 | Result = getADAEntry(DAG, GV, DL, PtrVT); |
3552 | } else |
3553 | llvm_unreachable("Unexpected Subtarget" ); |
3554 | |
3555 | // If there was a non-zero offset that we didn't fold, create an explicit |
3556 | // addition for it. |
3557 | if (Offset != 0) |
3558 | Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result, |
3559 | N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT)); |
3560 | |
3561 | return Result; |
3562 | } |
3563 | |
3564 | SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, |
3565 | SelectionDAG &DAG, |
3566 | unsigned Opcode, |
3567 | SDValue GOTOffset) const { |
3568 | SDLoc DL(Node); |
3569 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3570 | SDValue Chain = DAG.getEntryNode(); |
3571 | SDValue Glue; |
3572 | |
3573 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
3574 | CallingConv::GHC) |
3575 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
3576 | |
3577 | // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. |
3578 | SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(VT: PtrVT); |
3579 | Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); |
3580 | Glue = Chain.getValue(R: 1); |
3581 | Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); |
3582 | Glue = Chain.getValue(R: 1); |
3583 | |
3584 | // The first call operand is the chain and the second is the TLS symbol. |
3585 | SmallVector<SDValue, 8> Ops; |
3586 | Ops.push_back(Elt: Chain); |
3587 | Ops.push_back(Elt: DAG.getTargetGlobalAddress(GV: Node->getGlobal(), DL, |
3588 | VT: Node->getValueType(ResNo: 0), |
3589 | offset: 0, TargetFlags: 0)); |
3590 | |
3591 | // Add argument registers to the end of the list so that they are |
3592 | // known live into the call. |
3593 | Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); |
3594 | Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); |
3595 | |
3596 | // Add a register mask operand representing the call-preserved registers. |
3597 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
3598 | const uint32_t *Mask = |
3599 | TRI->getCallPreservedMask(MF: DAG.getMachineFunction(), CallingConv::C); |
3600 | assert(Mask && "Missing call preserved mask for calling convention" ); |
3601 | Ops.push_back(Elt: DAG.getRegisterMask(RegMask: Mask)); |
3602 | |
3603 | // Glue the call to the argument copies. |
3604 | Ops.push_back(Elt: Glue); |
3605 | |
3606 | // Emit the call. |
3607 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
3608 | Chain = DAG.getNode(Opcode, DL, VTList: NodeTys, Ops); |
3609 | Glue = Chain.getValue(R: 1); |
3610 | |
3611 | // Copy the return value from %r2. |
3612 | return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); |
3613 | } |
3614 | |
3615 | SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, |
3616 | SelectionDAG &DAG) const { |
3617 | SDValue Chain = DAG.getEntryNode(); |
3618 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3619 | |
3620 | // The high part of the thread pointer is in access register 0. |
3621 | SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); |
3622 | TPHi = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL, VT: PtrVT, Operand: TPHi); |
3623 | |
3624 | // The low part of the thread pointer is in access register 1. |
3625 | SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); |
3626 | TPLo = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL, VT: PtrVT, Operand: TPLo); |
3627 | |
3628 | // Merge them into a single 64-bit address. |
3629 | SDValue TPHiShifted = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: TPHi, |
3630 | N2: DAG.getConstant(Val: 32, DL, VT: PtrVT)); |
3631 | return DAG.getNode(Opcode: ISD::OR, DL, VT: PtrVT, N1: TPHiShifted, N2: TPLo); |
3632 | } |
3633 | |
3634 | SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, |
3635 | SelectionDAG &DAG) const { |
3636 | if (DAG.getTarget().useEmulatedTLS()) |
3637 | return LowerToTLSEmulatedModel(GA: Node, DAG); |
3638 | SDLoc DL(Node); |
3639 | const GlobalValue *GV = Node->getGlobal(); |
3640 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3641 | TLSModel::Model model = DAG.getTarget().getTLSModel(GV); |
3642 | |
3643 | if (DAG.getMachineFunction().getFunction().getCallingConv() == |
3644 | CallingConv::GHC) |
3645 | report_fatal_error(reason: "In GHC calling convention TLS is not supported" ); |
3646 | |
3647 | SDValue TP = lowerThreadPointer(DL, DAG); |
3648 | |
3649 | // Get the offset of GA from the thread pointer, based on the TLS model. |
3650 | SDValue Offset; |
3651 | switch (model) { |
3652 | case TLSModel::GeneralDynamic: { |
3653 | // Load the GOT offset of the tls_index (module ID / per-symbol offset). |
3654 | SystemZConstantPoolValue *CPV = |
3655 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSGD); |
3656 | |
3657 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
3658 | Offset = DAG.getLoad( |
3659 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
3660 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
3661 | |
3662 | // Call __tls_get_offset to retrieve the offset. |
3663 | Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_GDCALL, GOTOffset: Offset); |
3664 | break; |
3665 | } |
3666 | |
3667 | case TLSModel::LocalDynamic: { |
3668 | // Load the GOT offset of the module ID. |
3669 | SystemZConstantPoolValue *CPV = |
3670 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::TLSLDM); |
3671 | |
3672 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
3673 | Offset = DAG.getLoad( |
3674 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
3675 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
3676 | |
3677 | // Call __tls_get_offset to retrieve the module base offset. |
3678 | Offset = lowerTLSGetOffset(Node, DAG, Opcode: SystemZISD::TLS_LDCALL, GOTOffset: Offset); |
3679 | |
3680 | // Note: The SystemZLDCleanupPass will remove redundant computations |
3681 | // of the module base offset. Count total number of local-dynamic |
3682 | // accesses to trigger execution of that pass. |
3683 | SystemZMachineFunctionInfo* MFI = |
3684 | DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); |
3685 | MFI->incNumLocalDynamicTLSAccesses(); |
3686 | |
3687 | // Add the per-symbol offset. |
3688 | CPV = SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::DTPOFF); |
3689 | |
3690 | SDValue DTPOffset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
3691 | DTPOffset = DAG.getLoad( |
3692 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: DTPOffset, |
3693 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
3694 | |
3695 | Offset = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Offset, N2: DTPOffset); |
3696 | break; |
3697 | } |
3698 | |
3699 | case TLSModel::InitialExec: { |
3700 | // Load the offset from the GOT. |
3701 | Offset = DAG.getTargetGlobalAddress(GV, DL, VT: PtrVT, offset: 0, |
3702 | TargetFlags: SystemZII::MO_INDNTPOFF); |
3703 | Offset = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Offset); |
3704 | Offset = |
3705 | DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
3706 | PtrInfo: MachinePointerInfo::getGOT(MF&: DAG.getMachineFunction())); |
3707 | break; |
3708 | } |
3709 | |
3710 | case TLSModel::LocalExec: { |
3711 | // Force the offset into the constant pool and load it from there. |
3712 | SystemZConstantPoolValue *CPV = |
3713 | SystemZConstantPoolValue::Create(GV, Modifier: SystemZCP::NTPOFF); |
3714 | |
3715 | Offset = DAG.getConstantPool(C: CPV, VT: PtrVT, Align: Align(8)); |
3716 | Offset = DAG.getLoad( |
3717 | VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: Offset, |
3718 | PtrInfo: MachinePointerInfo::getConstantPool(MF&: DAG.getMachineFunction())); |
3719 | break; |
3720 | } |
3721 | } |
3722 | |
3723 | // Add the base and offset together. |
3724 | return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: TP, N2: Offset); |
3725 | } |
3726 | |
3727 | SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, |
3728 | SelectionDAG &DAG) const { |
3729 | SDLoc DL(Node); |
3730 | const BlockAddress *BA = Node->getBlockAddress(); |
3731 | int64_t Offset = Node->getOffset(); |
3732 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3733 | |
3734 | SDValue Result = DAG.getTargetBlockAddress(BA, VT: PtrVT, Offset); |
3735 | Result = DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3736 | return Result; |
3737 | } |
3738 | |
3739 | SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, |
3740 | SelectionDAG &DAG) const { |
3741 | SDLoc DL(JT); |
3742 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3743 | SDValue Result = DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: PtrVT); |
3744 | |
3745 | // Use LARL to load the address of the table. |
3746 | return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3747 | } |
3748 | |
3749 | SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, |
3750 | SelectionDAG &DAG) const { |
3751 | SDLoc DL(CP); |
3752 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3753 | |
3754 | SDValue Result; |
3755 | if (CP->isMachineConstantPoolEntry()) |
3756 | Result = |
3757 | DAG.getTargetConstantPool(C: CP->getMachineCPVal(), VT: PtrVT, Align: CP->getAlign()); |
3758 | else |
3759 | Result = DAG.getTargetConstantPool(C: CP->getConstVal(), VT: PtrVT, Align: CP->getAlign(), |
3760 | Offset: CP->getOffset()); |
3761 | |
3762 | // Use LARL to load the address of the constant pool entry. |
3763 | return DAG.getNode(Opcode: SystemZISD::PCREL_WRAPPER, DL, VT: PtrVT, Operand: Result); |
3764 | } |
3765 | |
3766 | SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, |
3767 | SelectionDAG &DAG) const { |
3768 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
3769 | MachineFunction &MF = DAG.getMachineFunction(); |
3770 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3771 | MFI.setFrameAddressIsTaken(true); |
3772 | |
3773 | SDLoc DL(Op); |
3774 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
3775 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3776 | |
3777 | // By definition, the frame address is the address of the back chain. (In |
3778 | // the case of packed stack without backchain, return the address where the |
3779 | // backchain would have been stored. This will either be an unused space or |
3780 | // contain a saved register). |
3781 | int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); |
3782 | SDValue BackChain = DAG.getFrameIndex(FI: BackChainIdx, VT: PtrVT); |
3783 | |
3784 | if (Depth > 0) { |
3785 | // FIXME The frontend should detect this case. |
3786 | if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) |
3787 | report_fatal_error(reason: "Unsupported stack frame traversal count" ); |
3788 | |
3789 | SDValue Offset = DAG.getConstant(Val: TFL->getBackchainOffset(MF), DL, VT: PtrVT); |
3790 | while (Depth--) { |
3791 | BackChain = DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr: BackChain, |
3792 | PtrInfo: MachinePointerInfo()); |
3793 | BackChain = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BackChain, N2: Offset); |
3794 | } |
3795 | } |
3796 | |
3797 | return BackChain; |
3798 | } |
3799 | |
3800 | SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, |
3801 | SelectionDAG &DAG) const { |
3802 | MachineFunction &MF = DAG.getMachineFunction(); |
3803 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3804 | MFI.setReturnAddressIsTaken(true); |
3805 | |
3806 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
3807 | return SDValue(); |
3808 | |
3809 | SDLoc DL(Op); |
3810 | unsigned Depth = Op.getConstantOperandVal(i: 0); |
3811 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3812 | |
3813 | if (Depth > 0) { |
3814 | // FIXME The frontend should detect this case. |
3815 | if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) |
3816 | report_fatal_error(reason: "Unsupported stack frame traversal count" ); |
3817 | |
3818 | SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); |
3819 | const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
3820 | int Offset = TFL->getReturnAddressOffset(MF); |
3821 | SDValue Ptr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FrameAddr, |
3822 | N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT)); |
3823 | return DAG.getLoad(VT: PtrVT, dl: DL, Chain: DAG.getEntryNode(), Ptr, |
3824 | PtrInfo: MachinePointerInfo()); |
3825 | } |
3826 | |
3827 | // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an |
3828 | // implicit live-in. |
3829 | SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); |
3830 | Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), |
3831 | &SystemZ::GR64BitRegClass); |
3832 | return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL, Reg: LinkReg, VT: PtrVT); |
3833 | } |
3834 | |
3835 | SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, |
3836 | SelectionDAG &DAG) const { |
3837 | SDLoc DL(Op); |
3838 | SDValue In = Op.getOperand(i: 0); |
3839 | EVT InVT = In.getValueType(); |
3840 | EVT ResVT = Op.getValueType(); |
3841 | |
3842 | // Convert loads directly. This is normally done by DAGCombiner, |
3843 | // but we need this case for bitcasts that are created during lowering |
3844 | // and which are then lowered themselves. |
3845 | if (auto *LoadN = dyn_cast<LoadSDNode>(Val&: In)) |
3846 | if (ISD::isNormalLoad(N: LoadN)) { |
3847 | SDValue NewLoad = DAG.getLoad(VT: ResVT, dl: DL, Chain: LoadN->getChain(), |
3848 | Ptr: LoadN->getBasePtr(), MMO: LoadN->getMemOperand()); |
3849 | // Update the chain uses. |
3850 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(LoadN, 1), To: NewLoad.getValue(R: 1)); |
3851 | return NewLoad; |
3852 | } |
3853 | |
3854 | if (InVT == MVT::i32 && ResVT == MVT::f32) { |
3855 | SDValue In64; |
3856 | if (Subtarget.hasHighWord()) { |
3857 | SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, |
3858 | MVT::i64); |
3859 | In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, |
3860 | MVT::i64, SDValue(U64, 0), In); |
3861 | } else { |
3862 | In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); |
3863 | In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, |
3864 | DAG.getConstant(32, DL, MVT::i64)); |
3865 | } |
3866 | SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); |
3867 | return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, |
3868 | DL, MVT::f32, Out64); |
3869 | } |
3870 | if (InVT == MVT::f32 && ResVT == MVT::i32) { |
3871 | SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); |
3872 | SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, |
3873 | MVT::f64, SDValue(U64, 0), In); |
3874 | SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); |
3875 | if (Subtarget.hasHighWord()) |
3876 | return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, |
3877 | MVT::i32, Out64); |
3878 | SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, |
3879 | DAG.getConstant(32, DL, MVT::i64)); |
3880 | return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); |
3881 | } |
3882 | llvm_unreachable("Unexpected bitcast combination" ); |
3883 | } |
3884 | |
3885 | SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, |
3886 | SelectionDAG &DAG) const { |
3887 | |
3888 | if (Subtarget.isTargetXPLINK64()) |
3889 | return lowerVASTART_XPLINK(Op, DAG); |
3890 | else |
3891 | return lowerVASTART_ELF(Op, DAG); |
3892 | } |
3893 | |
3894 | SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, |
3895 | SelectionDAG &DAG) const { |
3896 | MachineFunction &MF = DAG.getMachineFunction(); |
3897 | SystemZMachineFunctionInfo *FuncInfo = |
3898 | MF.getInfo<SystemZMachineFunctionInfo>(); |
3899 | |
3900 | SDLoc DL(Op); |
3901 | |
3902 | // vastart just stores the address of the VarArgsFrameIndex slot into the |
3903 | // memory location argument. |
3904 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3905 | SDValue FR = DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT); |
3906 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
3907 | return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: FR, Ptr: Op.getOperand(i: 1), |
3908 | PtrInfo: MachinePointerInfo(SV)); |
3909 | } |
3910 | |
3911 | SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, |
3912 | SelectionDAG &DAG) const { |
3913 | MachineFunction &MF = DAG.getMachineFunction(); |
3914 | SystemZMachineFunctionInfo *FuncInfo = |
3915 | MF.getInfo<SystemZMachineFunctionInfo>(); |
3916 | EVT PtrVT = getPointerTy(DL: DAG.getDataLayout()); |
3917 | |
3918 | SDValue Chain = Op.getOperand(i: 0); |
3919 | SDValue Addr = Op.getOperand(i: 1); |
3920 | const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue(); |
3921 | SDLoc DL(Op); |
3922 | |
3923 | // The initial values of each field. |
3924 | const unsigned NumFields = 4; |
3925 | SDValue Fields[NumFields] = { |
3926 | DAG.getConstant(Val: FuncInfo->getVarArgsFirstGPR(), DL, VT: PtrVT), |
3927 | DAG.getConstant(Val: FuncInfo->getVarArgsFirstFPR(), DL, VT: PtrVT), |
3928 | DAG.getFrameIndex(FI: FuncInfo->getVarArgsFrameIndex(), VT: PtrVT), |
3929 | DAG.getFrameIndex(FI: FuncInfo->getRegSaveFrameIndex(), VT: PtrVT) |
3930 | }; |
3931 | |
3932 | // Store each field into its respective slot. |
3933 | SDValue MemOps[NumFields]; |
3934 | unsigned Offset = 0; |
3935 | for (unsigned I = 0; I < NumFields; ++I) { |
3936 | SDValue FieldAddr = Addr; |
3937 | if (Offset != 0) |
3938 | FieldAddr = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FieldAddr, |
3939 | N2: DAG.getIntPtrConstant(Val: Offset, DL)); |
3940 | MemOps[I] = DAG.getStore(Chain, dl: DL, Val: Fields[I], Ptr: FieldAddr, |
3941 | PtrInfo: MachinePointerInfo(SV, Offset)); |
3942 | Offset += 8; |
3943 | } |
3944 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
3945 | } |
3946 | |
3947 | SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, |
3948 | SelectionDAG &DAG) const { |
3949 | SDValue Chain = Op.getOperand(i: 0); |
3950 | SDValue DstPtr = Op.getOperand(i: 1); |
3951 | SDValue SrcPtr = Op.getOperand(i: 2); |
3952 | const Value *DstSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 3))->getValue(); |
3953 | const Value *SrcSV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 4))->getValue(); |
3954 | SDLoc DL(Op); |
3955 | |
3956 | uint32_t Sz = |
3957 | Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(AS: 0) : 32; |
3958 | return DAG.getMemcpy(Chain, dl: DL, Dst: DstPtr, Src: SrcPtr, Size: DAG.getIntPtrConstant(Val: Sz, DL), |
3959 | Alignment: Align(8), /*isVolatile*/ isVol: false, /*AlwaysInline*/ false, |
3960 | /*isTailCall*/ false, DstPtrInfo: MachinePointerInfo(DstSV), |
3961 | SrcPtrInfo: MachinePointerInfo(SrcSV)); |
3962 | } |
3963 | |
3964 | SDValue |
3965 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, |
3966 | SelectionDAG &DAG) const { |
3967 | if (Subtarget.isTargetXPLINK64()) |
3968 | return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); |
3969 | else |
3970 | return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); |
3971 | } |
3972 | |
3973 | SDValue |
3974 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, |
3975 | SelectionDAG &DAG) const { |
3976 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
3977 | MachineFunction &MF = DAG.getMachineFunction(); |
3978 | bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack" ); |
3979 | SDValue Chain = Op.getOperand(i: 0); |
3980 | SDValue Size = Op.getOperand(i: 1); |
3981 | SDValue Align = Op.getOperand(i: 2); |
3982 | SDLoc DL(Op); |
3983 | |
3984 | // If user has set the no alignment function attribute, ignore |
3985 | // alloca alignments. |
3986 | uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); |
3987 | |
3988 | uint64_t StackAlign = TFI->getStackAlignment(); |
3989 | uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign); |
3990 | uint64_t = RequiredAlign - StackAlign; |
3991 | |
3992 | SDValue NeededSpace = Size; |
3993 | |
3994 | // Add extra space for alignment if needed. |
3995 | EVT PtrVT = getPointerTy(DL: MF.getDataLayout()); |
3996 | if (ExtraAlignSpace) |
3997 | NeededSpace = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: NeededSpace, |
3998 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT)); |
3999 | |
4000 | bool IsSigned = false; |
4001 | bool DoesNotReturn = false; |
4002 | bool IsReturnValueUsed = false; |
4003 | EVT VT = Op.getValueType(); |
4004 | SDValue AllocaCall = |
4005 | makeExternalCall(Chain, DAG, CalleeName: "@@ALCAXP" , RetVT: VT, Ops: ArrayRef(NeededSpace), |
4006 | CallConv: CallingConv::C, IsSigned, DL, DoesNotReturn, |
4007 | IsReturnValueUsed) |
4008 | .first; |
4009 | |
4010 | // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue |
4011 | // to end of call in order to ensure it isn't broken up from the call |
4012 | // sequence. |
4013 | auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); |
4014 | Register SPReg = Regs.getStackPointerRegister(); |
4015 | Chain = AllocaCall.getValue(R: 1); |
4016 | SDValue Glue = AllocaCall.getValue(R: 2); |
4017 | SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, dl: DL, Reg: SPReg, VT: PtrVT, Glue); |
4018 | Chain = NewSPRegNode.getValue(R: 1); |
4019 | |
4020 | MVT PtrMVT = getPointerMemTy(DL: MF.getDataLayout()); |
4021 | SDValue ArgAdjust = DAG.getNode(Opcode: SystemZISD::ADJDYNALLOC, DL, VT: PtrMVT); |
4022 | SDValue Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrMVT, N1: NewSPRegNode, N2: ArgAdjust); |
4023 | |
4024 | // Dynamically realign if needed. |
4025 | if (ExtraAlignSpace) { |
4026 | Result = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: Result, |
4027 | N2: DAG.getConstant(Val: ExtraAlignSpace, DL, VT: PtrVT)); |
4028 | Result = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Result, |
4029 | N2: DAG.getConstant(Val: ~(RequiredAlign - 1), DL, VT: PtrVT)); |
4030 | } |
4031 | |
4032 | SDValue Ops[2] = {Result, Chain}; |
4033 | return DAG.getMergeValues(Ops, dl: DL); |
4034 | } |
4035 | |
4036 | SDValue |
4037 | SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, |
4038 | SelectionDAG &DAG) const { |
4039 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
4040 | MachineFunction &MF = DAG.getMachineFunction(); |
4041 | bool RealignOpt = !MF.getFunction().hasFnAttribute(Kind: "no-realign-stack" ); |
4042 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
4043 | |
4044 | SDValue Chain = Op.getOperand(i: 0); |
4045 | SDValue Size = Op.getOperand(i: 1); |
4046 | SDValue Align = Op.getOperand(i: 2); |
4047 | SDLoc DL(Op); |
4048 | |
4049 | // If user has set the no alignment function attribute, ignore |
4050 | // alloca alignments. |
4051 | uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); |
4052 | |
4053 | uint64_t StackAlign = TFI->getStackAlignment(); |
4054 | uint64_t RequiredAlign = std::max(a: AlignVal, b: StackAlign); |
4055 | uint64_t = RequiredAlign - StackAlign; |
4056 | |
4057 | Register SPReg = getStackPointerRegisterToSaveRestore(); |
4058 | SDValue NeededSpace = Size; |
4059 | |
4060 | // Get a reference to the stack pointer. |
4061 | SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); |
4062 | |
4063 | // If we need a backchain, save it now. |
4064 | SDValue Backchain; |
4065 | if (StoreBackchain) |
4066 | Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), |
4067 | MachinePointerInfo()); |
4068 | |
4069 | // Add extra space for alignment if needed. |
4070 | if (ExtraAlignSpace) |
4071 | NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, |
4072 | DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); |
4073 | |
4074 | // Get the new stack pointer value. |
4075 | SDValue NewSP; |
4076 | if (hasInlineStackProbe(MF)) { |
4077 | NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, |
4078 | DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); |
4079 | Chain = NewSP.getValue(R: 1); |
4080 | } |
4081 | else { |
4082 | NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); |
4083 | // Copy the new stack pointer back. |
4084 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: SPReg, N: NewSP); |
4085 | } |
4086 | |
4087 | // The allocated data lives above the 160 bytes allocated for the standard |
4088 | // frame, plus any outgoing stack arguments. We don't know how much that |
4089 | // amounts to yet, so emit a special ADJDYNALLOC placeholder. |
4090 | SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); |
4091 | SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); |
4092 | |
4093 | // Dynamically realign if needed. |
4094 | if (RequiredAlign > StackAlign) { |
4095 | Result = |
4096 | DAG.getNode(ISD::ADD, DL, MVT::i64, Result, |
4097 | DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); |
4098 | Result = |
4099 | DAG.getNode(ISD::AND, DL, MVT::i64, Result, |
4100 | DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); |
4101 | } |
4102 | |
4103 | if (StoreBackchain) |
4104 | Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG), |
4105 | PtrInfo: MachinePointerInfo()); |
4106 | |
4107 | SDValue Ops[2] = { Result, Chain }; |
4108 | return DAG.getMergeValues(Ops, dl: DL); |
4109 | } |
4110 | |
4111 | SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( |
4112 | SDValue Op, SelectionDAG &DAG) const { |
4113 | SDLoc DL(Op); |
4114 | |
4115 | return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); |
4116 | } |
4117 | |
4118 | SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, |
4119 | SelectionDAG &DAG) const { |
4120 | EVT VT = Op.getValueType(); |
4121 | SDLoc DL(Op); |
4122 | SDValue Ops[2]; |
4123 | if (is32Bit(VT)) |
4124 | // Just do a normal 64-bit multiplication and extract the results. |
4125 | // We define this so that it can be used for constant division. |
4126 | lowerMUL_LOHI32(DAG, DL, Extend: ISD::SIGN_EXTEND, Op0: Op.getOperand(i: 0), |
4127 | Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]); |
4128 | else if (Subtarget.hasMiscellaneousExtensions2()) |
4129 | // SystemZISD::SMUL_LOHI returns the low result in the odd register and |
4130 | // the high result in the even register. ISD::SMUL_LOHI is defined to |
4131 | // return the low half first, so the results are in reverse order. |
4132 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SMUL_LOHI, |
4133 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4134 | else { |
4135 | // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: |
4136 | // |
4137 | // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) |
4138 | // |
4139 | // but using the fact that the upper halves are either all zeros |
4140 | // or all ones: |
4141 | // |
4142 | // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) |
4143 | // |
4144 | // and grouping the right terms together since they are quicker than the |
4145 | // multiplication: |
4146 | // |
4147 | // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) |
4148 | SDValue C63 = DAG.getConstant(63, DL, MVT::i64); |
4149 | SDValue LL = Op.getOperand(i: 0); |
4150 | SDValue RL = Op.getOperand(i: 1); |
4151 | SDValue LH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: LL, N2: C63); |
4152 | SDValue RH = DAG.getNode(Opcode: ISD::SRA, DL, VT, N1: RL, N2: C63); |
4153 | // SystemZISD::UMUL_LOHI returns the low result in the odd register and |
4154 | // the high result in the even register. ISD::SMUL_LOHI is defined to |
4155 | // return the low half first, so the results are in reverse order. |
4156 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI, |
4157 | Op0: LL, Op1: RL, Even&: Ops[1], Odd&: Ops[0]); |
4158 | SDValue NegLLTimesRH = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LL, N2: RH); |
4159 | SDValue NegLHTimesRL = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: LH, N2: RL); |
4160 | SDValue NegSum = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: NegLLTimesRH, N2: NegLHTimesRL); |
4161 | Ops[1] = DAG.getNode(Opcode: ISD::SUB, DL, VT, N1: Ops[1], N2: NegSum); |
4162 | } |
4163 | return DAG.getMergeValues(Ops, dl: DL); |
4164 | } |
4165 | |
4166 | SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, |
4167 | SelectionDAG &DAG) const { |
4168 | EVT VT = Op.getValueType(); |
4169 | SDLoc DL(Op); |
4170 | SDValue Ops[2]; |
4171 | if (is32Bit(VT)) |
4172 | // Just do a normal 64-bit multiplication and extract the results. |
4173 | // We define this so that it can be used for constant division. |
4174 | lowerMUL_LOHI32(DAG, DL, Extend: ISD::ZERO_EXTEND, Op0: Op.getOperand(i: 0), |
4175 | Op1: Op.getOperand(i: 1), Hi&: Ops[1], Lo&: Ops[0]); |
4176 | else |
4177 | // SystemZISD::UMUL_LOHI returns the low result in the odd register and |
4178 | // the high result in the even register. ISD::UMUL_LOHI is defined to |
4179 | // return the low half first, so the results are in reverse order. |
4180 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UMUL_LOHI, |
4181 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4182 | return DAG.getMergeValues(Ops, dl: DL); |
4183 | } |
4184 | |
4185 | SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, |
4186 | SelectionDAG &DAG) const { |
4187 | SDValue Op0 = Op.getOperand(i: 0); |
4188 | SDValue Op1 = Op.getOperand(i: 1); |
4189 | EVT VT = Op.getValueType(); |
4190 | SDLoc DL(Op); |
4191 | |
4192 | // We use DSGF for 32-bit division. This means the first operand must |
4193 | // always be 64-bit, and the second operand should be 32-bit whenever |
4194 | // that is possible, to improve performance. |
4195 | if (is32Bit(VT)) |
4196 | Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); |
4197 | else if (DAG.ComputeNumSignBits(Op1) > 32) |
4198 | Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); |
4199 | |
4200 | // DSG(F) returns the remainder in the even register and the |
4201 | // quotient in the odd register. |
4202 | SDValue Ops[2]; |
4203 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::SDIVREM, Op0, Op1, Even&: Ops[1], Odd&: Ops[0]); |
4204 | return DAG.getMergeValues(Ops, dl: DL); |
4205 | } |
4206 | |
4207 | SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, |
4208 | SelectionDAG &DAG) const { |
4209 | EVT VT = Op.getValueType(); |
4210 | SDLoc DL(Op); |
4211 | |
4212 | // DL(G) returns the remainder in the even register and the |
4213 | // quotient in the odd register. |
4214 | SDValue Ops[2]; |
4215 | lowerGR128Binary(DAG, DL, VT, Opcode: SystemZISD::UDIVREM, |
4216 | Op0: Op.getOperand(i: 0), Op1: Op.getOperand(i: 1), Even&: Ops[1], Odd&: Ops[0]); |
4217 | return DAG.getMergeValues(Ops, dl: DL); |
4218 | } |
4219 | |
4220 | SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { |
4221 | assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation" ); |
4222 | |
4223 | // Get the known-zero masks for each operand. |
4224 | SDValue Ops[] = {Op.getOperand(i: 0), Op.getOperand(i: 1)}; |
4225 | KnownBits Known[2] = {DAG.computeKnownBits(Op: Ops[0]), |
4226 | DAG.computeKnownBits(Op: Ops[1])}; |
4227 | |
4228 | // See if the upper 32 bits of one operand and the lower 32 bits of the |
4229 | // other are known zero. They are the low and high operands respectively. |
4230 | uint64_t Masks[] = { Known[0].Zero.getZExtValue(), |
4231 | Known[1].Zero.getZExtValue() }; |
4232 | unsigned High, Low; |
4233 | if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) |
4234 | High = 1, Low = 0; |
4235 | else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) |
4236 | High = 0, Low = 1; |
4237 | else |
4238 | return Op; |
4239 | |
4240 | SDValue LowOp = Ops[Low]; |
4241 | SDValue HighOp = Ops[High]; |
4242 | |
4243 | // If the high part is a constant, we're better off using IILH. |
4244 | if (HighOp.getOpcode() == ISD::Constant) |
4245 | return Op; |
4246 | |
4247 | // If the low part is a constant that is outside the range of LHI, |
4248 | // then we're better off using IILF. |
4249 | if (LowOp.getOpcode() == ISD::Constant) { |
4250 | int64_t Value = int32_t(LowOp->getAsZExtVal()); |
4251 | if (!isInt<16>(x: Value)) |
4252 | return Op; |
4253 | } |
4254 | |
4255 | // Check whether the high part is an AND that doesn't change the |
4256 | // high 32 bits and just masks out low bits. We can skip it if so. |
4257 | if (HighOp.getOpcode() == ISD::AND && |
4258 | HighOp.getOperand(i: 1).getOpcode() == ISD::Constant) { |
4259 | SDValue HighOp0 = HighOp.getOperand(i: 0); |
4260 | uint64_t Mask = HighOp.getConstantOperandVal(i: 1); |
4261 | if (DAG.MaskedValueIsZero(Op: HighOp0, Mask: APInt(64, ~(Mask | 0xffffffff)))) |
4262 | HighOp = HighOp0; |
4263 | } |
4264 | |
4265 | // Take advantage of the fact that all GR32 operations only change the |
4266 | // low 32 bits by truncating Low to an i32 and inserting it directly |
4267 | // using a subreg. The interesting cases are those where the truncation |
4268 | // can be folded. |
4269 | SDLoc DL(Op); |
4270 | SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); |
4271 | return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, |
4272 | MVT::i64, HighOp, Low32); |
4273 | } |
4274 | |
4275 | // Lower SADDO/SSUBO/UADDO/USUBO nodes. |
4276 | SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, |
4277 | SelectionDAG &DAG) const { |
4278 | SDNode *N = Op.getNode(); |
4279 | SDValue LHS = N->getOperand(Num: 0); |
4280 | SDValue RHS = N->getOperand(Num: 1); |
4281 | SDLoc DL(N); |
4282 | |
4283 | if (N->getValueType(0) == MVT::i128) { |
4284 | unsigned BaseOp = 0; |
4285 | unsigned FlagOp = 0; |
4286 | bool IsBorrow = false; |
4287 | switch (Op.getOpcode()) { |
4288 | default: llvm_unreachable("Unknown instruction!" ); |
4289 | case ISD::UADDO: |
4290 | BaseOp = ISD::ADD; |
4291 | FlagOp = SystemZISD::VACC; |
4292 | break; |
4293 | case ISD::USUBO: |
4294 | BaseOp = ISD::SUB; |
4295 | FlagOp = SystemZISD::VSCBI; |
4296 | IsBorrow = true; |
4297 | break; |
4298 | } |
4299 | SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); |
4300 | SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); |
4301 | Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, |
4302 | DAG.getValueType(MVT::i1)); |
4303 | Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1)); |
4304 | if (IsBorrow) |
4305 | Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(), |
4306 | N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType())); |
4307 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag); |
4308 | } |
4309 | |
4310 | unsigned BaseOp = 0; |
4311 | unsigned CCValid = 0; |
4312 | unsigned CCMask = 0; |
4313 | |
4314 | switch (Op.getOpcode()) { |
4315 | default: llvm_unreachable("Unknown instruction!" ); |
4316 | case ISD::SADDO: |
4317 | BaseOp = SystemZISD::SADDO; |
4318 | CCValid = SystemZ::CCMASK_ARITH; |
4319 | CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; |
4320 | break; |
4321 | case ISD::SSUBO: |
4322 | BaseOp = SystemZISD::SSUBO; |
4323 | CCValid = SystemZ::CCMASK_ARITH; |
4324 | CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; |
4325 | break; |
4326 | case ISD::UADDO: |
4327 | BaseOp = SystemZISD::UADDO; |
4328 | CCValid = SystemZ::CCMASK_LOGICAL; |
4329 | CCMask = SystemZ::CCMASK_LOGICAL_CARRY; |
4330 | break; |
4331 | case ISD::USUBO: |
4332 | BaseOp = SystemZISD::USUBO; |
4333 | CCValid = SystemZ::CCMASK_LOGICAL; |
4334 | CCMask = SystemZ::CCMASK_LOGICAL_BORROW; |
4335 | break; |
4336 | } |
4337 | |
4338 | SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); |
4339 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS); |
4340 | |
4341 | SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask); |
4342 | if (N->getValueType(1) == MVT::i1) |
4343 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); |
4344 | |
4345 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC); |
4346 | } |
4347 | |
4348 | static bool isAddCarryChain(SDValue Carry) { |
4349 | while (Carry.getOpcode() == ISD::UADDO_CARRY) |
4350 | Carry = Carry.getOperand(i: 2); |
4351 | return Carry.getOpcode() == ISD::UADDO; |
4352 | } |
4353 | |
4354 | static bool isSubBorrowChain(SDValue Carry) { |
4355 | while (Carry.getOpcode() == ISD::USUBO_CARRY) |
4356 | Carry = Carry.getOperand(i: 2); |
4357 | return Carry.getOpcode() == ISD::USUBO; |
4358 | } |
4359 | |
4360 | // Lower UADDO_CARRY/USUBO_CARRY nodes. |
4361 | SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, |
4362 | SelectionDAG &DAG) const { |
4363 | |
4364 | SDNode *N = Op.getNode(); |
4365 | MVT VT = N->getSimpleValueType(ResNo: 0); |
4366 | |
4367 | // Let legalize expand this if it isn't a legal type yet. |
4368 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
4369 | return SDValue(); |
4370 | |
4371 | SDValue LHS = N->getOperand(Num: 0); |
4372 | SDValue RHS = N->getOperand(Num: 1); |
4373 | SDValue Carry = Op.getOperand(i: 2); |
4374 | SDLoc DL(N); |
4375 | |
4376 | if (VT == MVT::i128) { |
4377 | unsigned BaseOp = 0; |
4378 | unsigned FlagOp = 0; |
4379 | bool IsBorrow = false; |
4380 | switch (Op.getOpcode()) { |
4381 | default: llvm_unreachable("Unknown instruction!" ); |
4382 | case ISD::UADDO_CARRY: |
4383 | BaseOp = SystemZISD::VAC; |
4384 | FlagOp = SystemZISD::VACCC; |
4385 | break; |
4386 | case ISD::USUBO_CARRY: |
4387 | BaseOp = SystemZISD::VSBI; |
4388 | FlagOp = SystemZISD::VSBCBI; |
4389 | IsBorrow = true; |
4390 | break; |
4391 | } |
4392 | if (IsBorrow) |
4393 | Carry = DAG.getNode(Opcode: ISD::XOR, DL, VT: Carry.getValueType(), |
4394 | N1: Carry, N2: DAG.getConstant(Val: 1, DL, VT: Carry.getValueType())); |
4395 | Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); |
4396 | SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); |
4397 | SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); |
4398 | Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, |
4399 | DAG.getValueType(MVT::i1)); |
4400 | Flag = DAG.getZExtOrTrunc(Op: Flag, DL, VT: N->getValueType(ResNo: 1)); |
4401 | if (IsBorrow) |
4402 | Flag = DAG.getNode(Opcode: ISD::XOR, DL, VT: Flag.getValueType(), |
4403 | N1: Flag, N2: DAG.getConstant(Val: 1, DL, VT: Flag.getValueType())); |
4404 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: Flag); |
4405 | } |
4406 | |
4407 | unsigned BaseOp = 0; |
4408 | unsigned CCValid = 0; |
4409 | unsigned CCMask = 0; |
4410 | |
4411 | switch (Op.getOpcode()) { |
4412 | default: llvm_unreachable("Unknown instruction!" ); |
4413 | case ISD::UADDO_CARRY: |
4414 | if (!isAddCarryChain(Carry)) |
4415 | return SDValue(); |
4416 | |
4417 | BaseOp = SystemZISD::ADDCARRY; |
4418 | CCValid = SystemZ::CCMASK_LOGICAL; |
4419 | CCMask = SystemZ::CCMASK_LOGICAL_CARRY; |
4420 | break; |
4421 | case ISD::USUBO_CARRY: |
4422 | if (!isSubBorrowChain(Carry)) |
4423 | return SDValue(); |
4424 | |
4425 | BaseOp = SystemZISD::SUBCARRY; |
4426 | CCValid = SystemZ::CCMASK_LOGICAL; |
4427 | CCMask = SystemZ::CCMASK_LOGICAL_BORROW; |
4428 | break; |
4429 | } |
4430 | |
4431 | // Set the condition code from the carry flag. |
4432 | Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, |
4433 | DAG.getConstant(CCValid, DL, MVT::i32), |
4434 | DAG.getConstant(CCMask, DL, MVT::i32)); |
4435 | |
4436 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
4437 | SDValue Result = DAG.getNode(Opcode: BaseOp, DL, VTList: VTs, N1: LHS, N2: RHS, N3: Carry); |
4438 | |
4439 | SDValue SetCC = emitSETCC(DAG, DL, CCReg: Result.getValue(R: 1), CCValid, CCMask); |
4440 | if (N->getValueType(1) == MVT::i1) |
4441 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); |
4442 | |
4443 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL, VTList: N->getVTList(), N1: Result, N2: SetCC); |
4444 | } |
4445 | |
4446 | SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, |
4447 | SelectionDAG &DAG) const { |
4448 | EVT VT = Op.getValueType(); |
4449 | SDLoc DL(Op); |
4450 | Op = Op.getOperand(i: 0); |
4451 | |
4452 | if (VT.getScalarSizeInBits() == 128) { |
4453 | Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); |
4454 | Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); |
4455 | SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, |
4456 | DAG.getConstant(0, DL, MVT::i64)); |
4457 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4458 | return Op; |
4459 | } |
4460 | |
4461 | // Handle vector types via VPOPCT. |
4462 | if (VT.isVector()) { |
4463 | Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); |
4464 | Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); |
4465 | switch (VT.getScalarSizeInBits()) { |
4466 | case 8: |
4467 | break; |
4468 | case 16: { |
4469 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
4470 | SDValue Shift = DAG.getConstant(8, DL, MVT::i32); |
4471 | SDValue Tmp = DAG.getNode(Opcode: SystemZISD::VSHL_BY_SCALAR, DL, VT, N1: Op, N2: Shift); |
4472 | Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp); |
4473 | Op = DAG.getNode(Opcode: SystemZISD::VSRL_BY_SCALAR, DL, VT, N1: Op, N2: Shift); |
4474 | break; |
4475 | } |
4476 | case 32: { |
4477 | SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, |
4478 | DAG.getConstant(0, DL, MVT::i32)); |
4479 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4480 | break; |
4481 | } |
4482 | case 64: { |
4483 | SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, |
4484 | DAG.getConstant(0, DL, MVT::i32)); |
4485 | Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); |
4486 | Op = DAG.getNode(Opcode: SystemZISD::VSUM, DL, VT, N1: Op, N2: Tmp); |
4487 | break; |
4488 | } |
4489 | default: |
4490 | llvm_unreachable("Unexpected type" ); |
4491 | } |
4492 | return Op; |
4493 | } |
4494 | |
4495 | // Get the known-zero mask for the operand. |
4496 | KnownBits Known = DAG.computeKnownBits(Op); |
4497 | unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); |
4498 | if (NumSignificantBits == 0) |
4499 | return DAG.getConstant(Val: 0, DL, VT); |
4500 | |
4501 | // Skip known-zero high parts of the operand. |
4502 | int64_t OrigBitSize = VT.getSizeInBits(); |
4503 | int64_t BitSize = llvm::bit_ceil(Value: NumSignificantBits); |
4504 | BitSize = std::min(a: BitSize, b: OrigBitSize); |
4505 | |
4506 | // The POPCNT instruction counts the number of bits in each byte. |
4507 | Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); |
4508 | Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); |
4509 | Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
4510 | |
4511 | // Add up per-byte counts in a binary tree. All bits of Op at |
4512 | // position larger than BitSize remain zero throughout. |
4513 | for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { |
4514 | SDValue Tmp = DAG.getNode(Opcode: ISD::SHL, DL, VT, N1: Op, N2: DAG.getConstant(Val: I, DL, VT)); |
4515 | if (BitSize != OrigBitSize) |
4516 | Tmp = DAG.getNode(Opcode: ISD::AND, DL, VT, N1: Tmp, |
4517 | N2: DAG.getConstant(Val: ((uint64_t)1 << BitSize) - 1, DL, VT)); |
4518 | Op = DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: Op, N2: Tmp); |
4519 | } |
4520 | |
4521 | // Extract overall result from high byte. |
4522 | if (BitSize > 8) |
4523 | Op = DAG.getNode(Opcode: ISD::SRL, DL, VT, N1: Op, |
4524 | N2: DAG.getConstant(Val: BitSize - 8, DL, VT)); |
4525 | |
4526 | return Op; |
4527 | } |
4528 | |
4529 | SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, |
4530 | SelectionDAG &DAG) const { |
4531 | SDLoc DL(Op); |
4532 | AtomicOrdering FenceOrdering = |
4533 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(i: 1)); |
4534 | SyncScope::ID FenceSSID = |
4535 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2)); |
4536 | |
4537 | // The only fence that needs an instruction is a sequentially-consistent |
4538 | // cross-thread fence. |
4539 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
4540 | FenceSSID == SyncScope::System) { |
4541 | return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, |
4542 | Op.getOperand(0)), |
4543 | 0); |
4544 | } |
4545 | |
4546 | // MEMBARRIER is a compiler barrier; it codegens to a no-op. |
4547 | return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); |
4548 | } |
4549 | |
4550 | SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, |
4551 | SelectionDAG &DAG) const { |
4552 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
4553 | assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128." ); |
4554 | // Use same code to handle both legal and non-legal i128 types. |
4555 | SmallVector<SDValue, 2> Results; |
4556 | LowerOperationWrapper(N: Node, Results, DAG); |
4557 | return DAG.getMergeValues(Ops: Results, dl: SDLoc(Op)); |
4558 | } |
4559 | |
4560 | // Prepare for a Compare And Swap for a subword operation. This needs to be |
4561 | // done in memory with 4 bytes at natural alignment. |
4562 | static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, |
4563 | SDValue &AlignedAddr, SDValue &BitShift, |
4564 | SDValue &NegBitShift) { |
4565 | EVT PtrVT = Addr.getValueType(); |
4566 | EVT WideVT = MVT::i32; |
4567 | |
4568 | // Get the address of the containing word. |
4569 | AlignedAddr = DAG.getNode(Opcode: ISD::AND, DL, VT: PtrVT, N1: Addr, |
4570 | N2: DAG.getConstant(Val: -4, DL, VT: PtrVT)); |
4571 | |
4572 | // Get the number of bits that the word must be rotated left in order |
4573 | // to bring the field to the top bits of a GR32. |
4574 | BitShift = DAG.getNode(Opcode: ISD::SHL, DL, VT: PtrVT, N1: Addr, |
4575 | N2: DAG.getConstant(Val: 3, DL, VT: PtrVT)); |
4576 | BitShift = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: WideVT, Operand: BitShift); |
4577 | |
4578 | // Get the complementing shift amount, for rotating a field in the top |
4579 | // bits back to its proper position. |
4580 | NegBitShift = DAG.getNode(Opcode: ISD::SUB, DL, VT: WideVT, |
4581 | N1: DAG.getConstant(Val: 0, DL, VT: WideVT), N2: BitShift); |
4582 | |
4583 | } |
4584 | |
4585 | // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first |
4586 | // two into the fullword ATOMIC_LOADW_* operation given by Opcode. |
4587 | SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, |
4588 | SelectionDAG &DAG, |
4589 | unsigned Opcode) const { |
4590 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
4591 | |
4592 | // 32-bit operations need no special handling. |
4593 | EVT NarrowVT = Node->getMemoryVT(); |
4594 | EVT WideVT = MVT::i32; |
4595 | if (NarrowVT == WideVT) |
4596 | return Op; |
4597 | |
4598 | int64_t BitSize = NarrowVT.getSizeInBits(); |
4599 | SDValue ChainIn = Node->getChain(); |
4600 | SDValue Addr = Node->getBasePtr(); |
4601 | SDValue Src2 = Node->getVal(); |
4602 | MachineMemOperand *MMO = Node->getMemOperand(); |
4603 | SDLoc DL(Node); |
4604 | |
4605 | // Convert atomic subtracts of constants into additions. |
4606 | if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) |
4607 | if (auto *Const = dyn_cast<ConstantSDNode>(Val&: Src2)) { |
4608 | Opcode = SystemZISD::ATOMIC_LOADW_ADD; |
4609 | Src2 = DAG.getConstant(Val: -Const->getSExtValue(), DL, VT: Src2.getValueType()); |
4610 | } |
4611 | |
4612 | SDValue AlignedAddr, BitShift, NegBitShift; |
4613 | getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); |
4614 | |
4615 | // Extend the source operand to 32 bits and prepare it for the inner loop. |
4616 | // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other |
4617 | // operations require the source to be shifted in advance. (This shift |
4618 | // can be folded if the source is constant.) For AND and NAND, the lower |
4619 | // bits must be set, while for other opcodes they should be left clear. |
4620 | if (Opcode != SystemZISD::ATOMIC_SWAPW) |
4621 | Src2 = DAG.getNode(Opcode: ISD::SHL, DL, VT: WideVT, N1: Src2, |
4622 | N2: DAG.getConstant(Val: 32 - BitSize, DL, VT: WideVT)); |
4623 | if (Opcode == SystemZISD::ATOMIC_LOADW_AND || |
4624 | Opcode == SystemZISD::ATOMIC_LOADW_NAND) |
4625 | Src2 = DAG.getNode(Opcode: ISD::OR, DL, VT: WideVT, N1: Src2, |
4626 | N2: DAG.getConstant(Val: uint32_t(-1) >> BitSize, DL, VT: WideVT)); |
4627 | |
4628 | // Construct the ATOMIC_LOADW_* node. |
4629 | SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); |
4630 | SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, |
4631 | DAG.getConstant(Val: BitSize, DL, VT: WideVT) }; |
4632 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, dl: DL, VTList, Ops, |
4633 | MemVT: NarrowVT, MMO); |
4634 | |
4635 | // Rotate the result of the final CS so that the field is in the lower |
4636 | // bits of a GR32, then truncate it. |
4637 | SDValue ResultShift = DAG.getNode(Opcode: ISD::ADD, DL, VT: WideVT, N1: BitShift, |
4638 | N2: DAG.getConstant(Val: BitSize, DL, VT: WideVT)); |
4639 | SDValue Result = DAG.getNode(Opcode: ISD::ROTL, DL, VT: WideVT, N1: AtomicOp, N2: ResultShift); |
4640 | |
4641 | SDValue RetOps[2] = { Result, AtomicOp.getValue(R: 1) }; |
4642 | return DAG.getMergeValues(Ops: RetOps, dl: DL); |
4643 | } |
4644 | |
4645 | // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into |
4646 | // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. |
4647 | SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, |
4648 | SelectionDAG &DAG) const { |
4649 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
4650 | EVT MemVT = Node->getMemoryVT(); |
4651 | if (MemVT == MVT::i32 || MemVT == MVT::i64) { |
4652 | // A full-width operation: negate and use LAA(G). |
4653 | assert(Op.getValueType() == MemVT && "Mismatched VTs" ); |
4654 | assert(Subtarget.hasInterlockedAccess1() && |
4655 | "Should have been expanded by AtomicExpand pass." ); |
4656 | SDValue Src2 = Node->getVal(); |
4657 | SDLoc DL(Src2); |
4658 | SDValue NegSrc2 = |
4659 | DAG.getNode(Opcode: ISD::SUB, DL, VT: MemVT, N1: DAG.getConstant(Val: 0, DL, VT: MemVT), N2: Src2); |
4660 | return DAG.getAtomic(Opcode: ISD::ATOMIC_LOAD_ADD, dl: DL, MemVT, |
4661 | Chain: Node->getChain(), Ptr: Node->getBasePtr(), Val: NegSrc2, |
4662 | MMO: Node->getMemOperand()); |
4663 | } |
4664 | |
4665 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_SUB); |
4666 | } |
4667 | |
4668 | // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. |
4669 | SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, |
4670 | SelectionDAG &DAG) const { |
4671 | auto *Node = cast<AtomicSDNode>(Val: Op.getNode()); |
4672 | SDValue ChainIn = Node->getOperand(Num: 0); |
4673 | SDValue Addr = Node->getOperand(Num: 1); |
4674 | SDValue CmpVal = Node->getOperand(Num: 2); |
4675 | SDValue SwapVal = Node->getOperand(Num: 3); |
4676 | MachineMemOperand *MMO = Node->getMemOperand(); |
4677 | SDLoc DL(Node); |
4678 | |
4679 | if (Node->getMemoryVT() == MVT::i128) { |
4680 | // Use same code to handle both legal and non-legal i128 types. |
4681 | SmallVector<SDValue, 3> Results; |
4682 | LowerOperationWrapper(N: Node, Results, DAG); |
4683 | return DAG.getMergeValues(Ops: Results, dl: DL); |
4684 | } |
4685 | |
4686 | // We have native support for 32-bit and 64-bit compare and swap, but we |
4687 | // still need to expand extracting the "success" result from the CC. |
4688 | EVT NarrowVT = Node->getMemoryVT(); |
4689 | EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; |
4690 | if (NarrowVT == WideVT) { |
4691 | SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); |
4692 | SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; |
4693 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAP, |
4694 | dl: DL, VTList: Tys, Ops, MemVT: NarrowVT, MMO); |
4695 | SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1), |
4696 | CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ); |
4697 | |
4698 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: AtomicOp.getValue(R: 0)); |
4699 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success); |
4700 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2)); |
4701 | return SDValue(); |
4702 | } |
4703 | |
4704 | // Convert 8-bit and 16-bit compare and swap to a loop, implemented |
4705 | // via a fullword ATOMIC_CMP_SWAPW operation. |
4706 | int64_t BitSize = NarrowVT.getSizeInBits(); |
4707 | |
4708 | SDValue AlignedAddr, BitShift, NegBitShift; |
4709 | getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); |
4710 | |
4711 | // Construct the ATOMIC_CMP_SWAPW node. |
4712 | SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); |
4713 | SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, |
4714 | NegBitShift, DAG.getConstant(Val: BitSize, DL, VT: WideVT) }; |
4715 | SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode: SystemZISD::ATOMIC_CMP_SWAPW, dl: DL, |
4716 | VTList, Ops, MemVT: NarrowVT, MMO); |
4717 | SDValue Success = emitSETCC(DAG, DL, CCReg: AtomicOp.getValue(R: 1), |
4718 | CCValid: SystemZ::CCMASK_ICMP, CCMask: SystemZ::CCMASK_CMP_EQ); |
4719 | |
4720 | // emitAtomicCmpSwapW() will zero extend the result (original value). |
4721 | SDValue OrigVal = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: WideVT, N1: AtomicOp.getValue(R: 0), |
4722 | N2: DAG.getValueType(NarrowVT)); |
4723 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 0), To: OrigVal); |
4724 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 1), To: Success); |
4725 | DAG.ReplaceAllUsesOfValueWith(From: Op.getValue(R: 2), To: AtomicOp.getValue(R: 2)); |
4726 | return SDValue(); |
4727 | } |
4728 | |
4729 | MachineMemOperand::Flags |
4730 | SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { |
4731 | // Because of how we convert atomic_load and atomic_store to normal loads and |
4732 | // stores in the DAG, we need to ensure that the MMOs are marked volatile |
4733 | // since DAGCombine hasn't been updated to account for atomic, but non |
4734 | // volatile loads. (See D57601) |
4735 | if (auto *SI = dyn_cast<StoreInst>(Val: &I)) |
4736 | if (SI->isAtomic()) |
4737 | return MachineMemOperand::MOVolatile; |
4738 | if (auto *LI = dyn_cast<LoadInst>(Val: &I)) |
4739 | if (LI->isAtomic()) |
4740 | return MachineMemOperand::MOVolatile; |
4741 | if (auto *AI = dyn_cast<AtomicRMWInst>(Val: &I)) |
4742 | if (AI->isAtomic()) |
4743 | return MachineMemOperand::MOVolatile; |
4744 | if (auto *AI = dyn_cast<AtomicCmpXchgInst>(Val: &I)) |
4745 | if (AI->isAtomic()) |
4746 | return MachineMemOperand::MOVolatile; |
4747 | return MachineMemOperand::MONone; |
4748 | } |
4749 | |
4750 | SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, |
4751 | SelectionDAG &DAG) const { |
4752 | MachineFunction &MF = DAG.getMachineFunction(); |
4753 | auto *Regs = Subtarget.getSpecialRegisters(); |
4754 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) |
4755 | report_fatal_error(reason: "Variable-sized stack allocations are not supported " |
4756 | "in GHC calling convention" ); |
4757 | return DAG.getCopyFromReg(Chain: Op.getOperand(i: 0), dl: SDLoc(Op), |
4758 | Reg: Regs->getStackPointerRegister(), VT: Op.getValueType()); |
4759 | } |
4760 | |
4761 | SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, |
4762 | SelectionDAG &DAG) const { |
4763 | MachineFunction &MF = DAG.getMachineFunction(); |
4764 | auto *Regs = Subtarget.getSpecialRegisters(); |
4765 | bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); |
4766 | |
4767 | if (MF.getFunction().getCallingConv() == CallingConv::GHC) |
4768 | report_fatal_error(reason: "Variable-sized stack allocations are not supported " |
4769 | "in GHC calling convention" ); |
4770 | |
4771 | SDValue Chain = Op.getOperand(i: 0); |
4772 | SDValue NewSP = Op.getOperand(i: 1); |
4773 | SDValue Backchain; |
4774 | SDLoc DL(Op); |
4775 | |
4776 | if (StoreBackchain) { |
4777 | SDValue OldSP = DAG.getCopyFromReg( |
4778 | Chain, DL, Regs->getStackPointerRegister(), MVT::i64); |
4779 | Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), |
4780 | MachinePointerInfo()); |
4781 | } |
4782 | |
4783 | Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: Regs->getStackPointerRegister(), N: NewSP); |
4784 | |
4785 | if (StoreBackchain) |
4786 | Chain = DAG.getStore(Chain, dl: DL, Val: Backchain, Ptr: getBackchainAddress(SP: NewSP, DAG), |
4787 | PtrInfo: MachinePointerInfo()); |
4788 | |
4789 | return Chain; |
4790 | } |
4791 | |
4792 | SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, |
4793 | SelectionDAG &DAG) const { |
4794 | bool IsData = Op.getConstantOperandVal(i: 4); |
4795 | if (!IsData) |
4796 | // Just preserve the chain. |
4797 | return Op.getOperand(i: 0); |
4798 | |
4799 | SDLoc DL(Op); |
4800 | bool IsWrite = Op.getConstantOperandVal(i: 2); |
4801 | unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; |
4802 | auto *Node = cast<MemIntrinsicSDNode>(Val: Op.getNode()); |
4803 | SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), |
4804 | Op.getOperand(1)}; |
4805 | return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, |
4806 | Node->getVTList(), Ops, |
4807 | Node->getMemoryVT(), Node->getMemOperand()); |
4808 | } |
4809 | |
4810 | // Convert condition code in CCReg to an i32 value. |
4811 | static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { |
4812 | SDLoc DL(CCReg); |
4813 | SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); |
4814 | return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, |
4815 | DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); |
4816 | } |
4817 | |
4818 | SDValue |
4819 | SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, |
4820 | SelectionDAG &DAG) const { |
4821 | unsigned Opcode, CCValid; |
4822 | if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { |
4823 | assert(Op->getNumValues() == 2 && "Expected only CC result and chain" ); |
4824 | SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); |
4825 | SDValue CC = getCCResult(DAG, CCReg: SDValue(Node, 0)); |
4826 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(Op.getNode(), 0), To: CC); |
4827 | return SDValue(); |
4828 | } |
4829 | |
4830 | return SDValue(); |
4831 | } |
4832 | |
4833 | SDValue |
4834 | SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, |
4835 | SelectionDAG &DAG) const { |
4836 | unsigned Opcode, CCValid; |
4837 | if (isIntrinsicWithCC(Op, Opcode, CCValid)) { |
4838 | SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); |
4839 | if (Op->getNumValues() == 1) |
4840 | return getCCResult(DAG, CCReg: SDValue(Node, 0)); |
4841 | assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result" ); |
4842 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op), VTList: Op->getVTList(), |
4843 | N1: SDValue(Node, 0), N2: getCCResult(DAG, CCReg: SDValue(Node, 1))); |
4844 | } |
4845 | |
4846 | unsigned Id = Op.getConstantOperandVal(i: 0); |
4847 | switch (Id) { |
4848 | case Intrinsic::thread_pointer: |
4849 | return lowerThreadPointer(DL: SDLoc(Op), DAG); |
4850 | |
4851 | case Intrinsic::s390_vpdi: |
4852 | return DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL: SDLoc(Op), VT: Op.getValueType(), |
4853 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4854 | |
4855 | case Intrinsic::s390_vperm: |
4856 | return DAG.getNode(Opcode: SystemZISD::PERMUTE, DL: SDLoc(Op), VT: Op.getValueType(), |
4857 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4858 | |
4859 | case Intrinsic::s390_vuphb: |
4860 | case Intrinsic::s390_vuphh: |
4861 | case Intrinsic::s390_vuphf: |
4862 | return DAG.getNode(Opcode: SystemZISD::UNPACK_HIGH, DL: SDLoc(Op), VT: Op.getValueType(), |
4863 | Operand: Op.getOperand(i: 1)); |
4864 | |
4865 | case Intrinsic::s390_vuplhb: |
4866 | case Intrinsic::s390_vuplhh: |
4867 | case Intrinsic::s390_vuplhf: |
4868 | return DAG.getNode(Opcode: SystemZISD::UNPACKL_HIGH, DL: SDLoc(Op), VT: Op.getValueType(), |
4869 | Operand: Op.getOperand(i: 1)); |
4870 | |
4871 | case Intrinsic::s390_vuplb: |
4872 | case Intrinsic::s390_vuplhw: |
4873 | case Intrinsic::s390_vuplf: |
4874 | return DAG.getNode(Opcode: SystemZISD::UNPACK_LOW, DL: SDLoc(Op), VT: Op.getValueType(), |
4875 | Operand: Op.getOperand(i: 1)); |
4876 | |
4877 | case Intrinsic::s390_vupllb: |
4878 | case Intrinsic::s390_vupllh: |
4879 | case Intrinsic::s390_vupllf: |
4880 | return DAG.getNode(Opcode: SystemZISD::UNPACKL_LOW, DL: SDLoc(Op), VT: Op.getValueType(), |
4881 | Operand: Op.getOperand(i: 1)); |
4882 | |
4883 | case Intrinsic::s390_vsumb: |
4884 | case Intrinsic::s390_vsumh: |
4885 | case Intrinsic::s390_vsumgh: |
4886 | case Intrinsic::s390_vsumgf: |
4887 | case Intrinsic::s390_vsumqf: |
4888 | case Intrinsic::s390_vsumqg: |
4889 | return DAG.getNode(Opcode: SystemZISD::VSUM, DL: SDLoc(Op), VT: Op.getValueType(), |
4890 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
4891 | |
4892 | case Intrinsic::s390_vaq: |
4893 | return DAG.getNode(Opcode: ISD::ADD, DL: SDLoc(Op), VT: Op.getValueType(), |
4894 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
4895 | case Intrinsic::s390_vaccb: |
4896 | case Intrinsic::s390_vacch: |
4897 | case Intrinsic::s390_vaccf: |
4898 | case Intrinsic::s390_vaccg: |
4899 | case Intrinsic::s390_vaccq: |
4900 | return DAG.getNode(Opcode: SystemZISD::VACC, DL: SDLoc(Op), VT: Op.getValueType(), |
4901 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
4902 | case Intrinsic::s390_vacq: |
4903 | return DAG.getNode(Opcode: SystemZISD::VAC, DL: SDLoc(Op), VT: Op.getValueType(), |
4904 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4905 | case Intrinsic::s390_vacccq: |
4906 | return DAG.getNode(Opcode: SystemZISD::VACCC, DL: SDLoc(Op), VT: Op.getValueType(), |
4907 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4908 | |
4909 | case Intrinsic::s390_vsq: |
4910 | return DAG.getNode(Opcode: ISD::SUB, DL: SDLoc(Op), VT: Op.getValueType(), |
4911 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
4912 | case Intrinsic::s390_vscbib: |
4913 | case Intrinsic::s390_vscbih: |
4914 | case Intrinsic::s390_vscbif: |
4915 | case Intrinsic::s390_vscbig: |
4916 | case Intrinsic::s390_vscbiq: |
4917 | return DAG.getNode(Opcode: SystemZISD::VSCBI, DL: SDLoc(Op), VT: Op.getValueType(), |
4918 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2)); |
4919 | case Intrinsic::s390_vsbiq: |
4920 | return DAG.getNode(Opcode: SystemZISD::VSBI, DL: SDLoc(Op), VT: Op.getValueType(), |
4921 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4922 | case Intrinsic::s390_vsbcbiq: |
4923 | return DAG.getNode(Opcode: SystemZISD::VSBCBI, DL: SDLoc(Op), VT: Op.getValueType(), |
4924 | N1: Op.getOperand(i: 1), N2: Op.getOperand(i: 2), N3: Op.getOperand(i: 3)); |
4925 | } |
4926 | |
4927 | return SDValue(); |
4928 | } |
4929 | |
4930 | namespace { |
4931 | // Says that SystemZISD operation Opcode can be used to perform the equivalent |
4932 | // of a VPERM with permute vector Bytes. If Opcode takes three operands, |
4933 | // Operand is the constant third operand, otherwise it is the number of |
4934 | // bytes in each element of the result. |
4935 | struct Permute { |
4936 | unsigned Opcode; |
4937 | unsigned Operand; |
4938 | unsigned char Bytes[SystemZ::VectorBytes]; |
4939 | }; |
4940 | } |
4941 | |
4942 | static const Permute PermuteForms[] = { |
4943 | // VMRHG |
4944 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 8, |
4945 | .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, |
4946 | // VMRHF |
4947 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 4, |
4948 | .Bytes: { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, |
4949 | // VMRHH |
4950 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 2, |
4951 | .Bytes: { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, |
4952 | // VMRHB |
4953 | { .Opcode: SystemZISD::MERGE_HIGH, .Operand: 1, |
4954 | .Bytes: { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, |
4955 | // VMRLG |
4956 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 8, |
4957 | .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, |
4958 | // VMRLF |
4959 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 4, |
4960 | .Bytes: { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, |
4961 | // VMRLH |
4962 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 2, |
4963 | .Bytes: { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, |
4964 | // VMRLB |
4965 | { .Opcode: SystemZISD::MERGE_LOW, .Operand: 1, |
4966 | .Bytes: { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, |
4967 | // VPKG |
4968 | { .Opcode: SystemZISD::PACK, .Operand: 4, |
4969 | .Bytes: { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, |
4970 | // VPKF |
4971 | { .Opcode: SystemZISD::PACK, .Operand: 2, |
4972 | .Bytes: { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, |
4973 | // VPKH |
4974 | { .Opcode: SystemZISD::PACK, .Operand: 1, |
4975 | .Bytes: { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, |
4976 | // VPDI V1, V2, 4 (low half of V1, high half of V2) |
4977 | { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 4, |
4978 | .Bytes: { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, |
4979 | // VPDI V1, V2, 1 (high half of V1, low half of V2) |
4980 | { .Opcode: SystemZISD::PERMUTE_DWORDS, .Operand: 1, |
4981 | .Bytes: { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } |
4982 | }; |
4983 | |
4984 | // Called after matching a vector shuffle against a particular pattern. |
4985 | // Both the original shuffle and the pattern have two vector operands. |
4986 | // OpNos[0] is the operand of the original shuffle that should be used for |
4987 | // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. |
4988 | // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and |
4989 | // set OpNo0 and OpNo1 to the shuffle operands that should actually be used |
4990 | // for operands 0 and 1 of the pattern. |
4991 | static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { |
4992 | if (OpNos[0] < 0) { |
4993 | if (OpNos[1] < 0) |
4994 | return false; |
4995 | OpNo0 = OpNo1 = OpNos[1]; |
4996 | } else if (OpNos[1] < 0) { |
4997 | OpNo0 = OpNo1 = OpNos[0]; |
4998 | } else { |
4999 | OpNo0 = OpNos[0]; |
5000 | OpNo1 = OpNos[1]; |
5001 | } |
5002 | return true; |
5003 | } |
5004 | |
5005 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5006 | // undefined bytes. Return true if the VPERM can be implemented using P. |
5007 | // When returning true set OpNo0 to the VPERM operand that should be |
5008 | // used for operand 0 of P and likewise OpNo1 for operand 1 of P. |
5009 | // |
5010 | // For example, if swapping the VPERM operands allows P to match, OpNo0 |
5011 | // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one |
5012 | // operand, but rewriting it to use two duplicated operands allows it to |
5013 | // match P, then OpNo0 and OpNo1 will be the same. |
5014 | static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, |
5015 | unsigned &OpNo0, unsigned &OpNo1) { |
5016 | int OpNos[] = { -1, -1 }; |
5017 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5018 | int Elt = Bytes[I]; |
5019 | if (Elt >= 0) { |
5020 | // Make sure that the two permute vectors use the same suboperand |
5021 | // byte number. Only the operand numbers (the high bits) are |
5022 | // allowed to differ. |
5023 | if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) |
5024 | return false; |
5025 | int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; |
5026 | int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; |
5027 | // Make sure that the operand mappings are consistent with previous |
5028 | // elements. |
5029 | if (OpNos[ModelOpNo] == 1 - RealOpNo) |
5030 | return false; |
5031 | OpNos[ModelOpNo] = RealOpNo; |
5032 | } |
5033 | } |
5034 | return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); |
5035 | } |
5036 | |
5037 | // As above, but search for a matching permute. |
5038 | static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, |
5039 | unsigned &OpNo0, unsigned &OpNo1) { |
5040 | for (auto &P : PermuteForms) |
5041 | if (matchPermute(Bytes, P, OpNo0, OpNo1)) |
5042 | return &P; |
5043 | return nullptr; |
5044 | } |
5045 | |
5046 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5047 | // undefined bytes. This permute is an operand of an outer permute. |
5048 | // See whether redistributing the -1 bytes gives a shuffle that can be |
5049 | // implemented using P. If so, set Transform to a VPERM-like permute vector |
5050 | // that, when applied to the result of P, gives the original permute in Bytes. |
5051 | static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, |
5052 | const Permute &P, |
5053 | SmallVectorImpl<int> &Transform) { |
5054 | unsigned To = 0; |
5055 | for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { |
5056 | int Elt = Bytes[From]; |
5057 | if (Elt < 0) |
5058 | // Byte number From of the result is undefined. |
5059 | Transform[From] = -1; |
5060 | else { |
5061 | while (P.Bytes[To] != Elt) { |
5062 | To += 1; |
5063 | if (To == SystemZ::VectorBytes) |
5064 | return false; |
5065 | } |
5066 | Transform[From] = To; |
5067 | } |
5068 | } |
5069 | return true; |
5070 | } |
5071 | |
5072 | // As above, but search for a matching permute. |
5073 | static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, |
5074 | SmallVectorImpl<int> &Transform) { |
5075 | for (auto &P : PermuteForms) |
5076 | if (matchDoublePermute(Bytes, P, Transform)) |
5077 | return &P; |
5078 | return nullptr; |
5079 | } |
5080 | |
5081 | // Convert the mask of the given shuffle op into a byte-level mask, |
5082 | // as if it had type vNi8. |
5083 | static bool getVPermMask(SDValue ShuffleOp, |
5084 | SmallVectorImpl<int> &Bytes) { |
5085 | EVT VT = ShuffleOp.getValueType(); |
5086 | unsigned NumElements = VT.getVectorNumElements(); |
5087 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5088 | |
5089 | if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: ShuffleOp)) { |
5090 | Bytes.resize(N: NumElements * BytesPerElement, NV: -1); |
5091 | for (unsigned I = 0; I < NumElements; ++I) { |
5092 | int Index = VSN->getMaskElt(Idx: I); |
5093 | if (Index >= 0) |
5094 | for (unsigned J = 0; J < BytesPerElement; ++J) |
5095 | Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; |
5096 | } |
5097 | return true; |
5098 | } |
5099 | if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && |
5100 | isa<ConstantSDNode>(Val: ShuffleOp.getOperand(i: 1))) { |
5101 | unsigned Index = ShuffleOp.getConstantOperandVal(i: 1); |
5102 | Bytes.resize(N: NumElements * BytesPerElement, NV: -1); |
5103 | for (unsigned I = 0; I < NumElements; ++I) |
5104 | for (unsigned J = 0; J < BytesPerElement; ++J) |
5105 | Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; |
5106 | return true; |
5107 | } |
5108 | return false; |
5109 | } |
5110 | |
5111 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5112 | // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of |
5113 | // the result come from a contiguous sequence of bytes from one input. |
5114 | // Set Base to the selector for the first byte if so. |
5115 | static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, |
5116 | unsigned BytesPerElement, int &Base) { |
5117 | Base = -1; |
5118 | for (unsigned I = 0; I < BytesPerElement; ++I) { |
5119 | if (Bytes[Start + I] >= 0) { |
5120 | unsigned Elem = Bytes[Start + I]; |
5121 | if (Base < 0) { |
5122 | Base = Elem - I; |
5123 | // Make sure the bytes would come from one input operand. |
5124 | if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) |
5125 | return false; |
5126 | } else if (unsigned(Base) != Elem - I) |
5127 | return false; |
5128 | } |
5129 | } |
5130 | return true; |
5131 | } |
5132 | |
5133 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5134 | // undefined bytes. Return true if it can be performed using VSLDB. |
5135 | // When returning true, set StartIndex to the shift amount and OpNo0 |
5136 | // and OpNo1 to the VPERM operands that should be used as the first |
5137 | // and second shift operand respectively. |
5138 | static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, |
5139 | unsigned &StartIndex, unsigned &OpNo0, |
5140 | unsigned &OpNo1) { |
5141 | int OpNos[] = { -1, -1 }; |
5142 | int Shift = -1; |
5143 | for (unsigned I = 0; I < 16; ++I) { |
5144 | int Index = Bytes[I]; |
5145 | if (Index >= 0) { |
5146 | int ExpectedShift = (Index - I) % SystemZ::VectorBytes; |
5147 | int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; |
5148 | int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; |
5149 | if (Shift < 0) |
5150 | Shift = ExpectedShift; |
5151 | else if (Shift != ExpectedShift) |
5152 | return false; |
5153 | // Make sure that the operand mappings are consistent with previous |
5154 | // elements. |
5155 | if (OpNos[ModelOpNo] == 1 - RealOpNo) |
5156 | return false; |
5157 | OpNos[ModelOpNo] = RealOpNo; |
5158 | } |
5159 | } |
5160 | StartIndex = Shift; |
5161 | return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); |
5162 | } |
5163 | |
5164 | // Create a node that performs P on operands Op0 and Op1, casting the |
5165 | // operands to the appropriate type. The type of the result is determined by P. |
5166 | static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, |
5167 | const Permute &P, SDValue Op0, SDValue Op1) { |
5168 | // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input |
5169 | // elements of a PACK are twice as wide as the outputs. |
5170 | unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : |
5171 | P.Opcode == SystemZISD::PACK ? P.Operand * 2 : |
5172 | P.Operand); |
5173 | // Cast both operands to the appropriate type. |
5174 | MVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBytes * 8), |
5175 | NumElements: SystemZ::VectorBytes / InBytes); |
5176 | Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op0); |
5177 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op1); |
5178 | SDValue Op; |
5179 | if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { |
5180 | SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); |
5181 | Op = DAG.getNode(Opcode: SystemZISD::PERMUTE_DWORDS, DL, VT: InVT, N1: Op0, N2: Op1, N3: Op2); |
5182 | } else if (P.Opcode == SystemZISD::PACK) { |
5183 | MVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: P.Operand * 8), |
5184 | NumElements: SystemZ::VectorBytes / P.Operand); |
5185 | Op = DAG.getNode(Opcode: SystemZISD::PACK, DL, VT: OutVT, N1: Op0, N2: Op1); |
5186 | } else { |
5187 | Op = DAG.getNode(Opcode: P.Opcode, DL, VT: InVT, N1: Op0, N2: Op1); |
5188 | } |
5189 | return Op; |
5190 | } |
5191 | |
5192 | static bool isZeroVector(SDValue N) { |
5193 | if (N->getOpcode() == ISD::BITCAST) |
5194 | N = N->getOperand(Num: 0); |
5195 | if (N->getOpcode() == ISD::SPLAT_VECTOR) |
5196 | if (auto *Op = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 0))) |
5197 | return Op->getZExtValue() == 0; |
5198 | return ISD::isBuildVectorAllZeros(N: N.getNode()); |
5199 | } |
5200 | |
5201 | // Return the index of the zero/undef vector, or UINT32_MAX if not found. |
5202 | static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { |
5203 | for (unsigned I = 0; I < Num ; I++) |
5204 | if (isZeroVector(N: Ops[I])) |
5205 | return I; |
5206 | return UINT32_MAX; |
5207 | } |
5208 | |
5209 | // Bytes is a VPERM-like permute vector, except that -1 is used for |
5210 | // undefined bytes. Implement it on operands Ops[0] and Ops[1] using |
5211 | // VSLDB or VPERM. |
5212 | static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, |
5213 | SDValue *Ops, |
5214 | const SmallVectorImpl<int> &Bytes) { |
5215 | for (unsigned I = 0; I < 2; ++I) |
5216 | Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); |
5217 | |
5218 | // First see whether VSLDB can be used. |
5219 | unsigned StartIndex, OpNo0, OpNo1; |
5220 | if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) |
5221 | return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], |
5222 | Ops[OpNo1], |
5223 | DAG.getTargetConstant(StartIndex, DL, MVT::i32)); |
5224 | |
5225 | // Fall back on VPERM. Construct an SDNode for the permute vector. Try to |
5226 | // eliminate a zero vector by reusing any zero index in the permute vector. |
5227 | unsigned ZeroVecIdx = findZeroVectorIdx(Ops: &Ops[0], Num: 2); |
5228 | if (ZeroVecIdx != UINT32_MAX) { |
5229 | bool MaskFirst = true; |
5230 | int ZeroIdx = -1; |
5231 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5232 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
5233 | unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; |
5234 | if (OpNo == ZeroVecIdx && I == 0) { |
5235 | // If the first byte is zero, use mask as first operand. |
5236 | ZeroIdx = 0; |
5237 | break; |
5238 | } |
5239 | if (OpNo != ZeroVecIdx && Byte == 0) { |
5240 | // If mask contains a zero, use it by placing that vector first. |
5241 | ZeroIdx = I + SystemZ::VectorBytes; |
5242 | MaskFirst = false; |
5243 | break; |
5244 | } |
5245 | } |
5246 | if (ZeroIdx != -1) { |
5247 | SDValue IndexNodes[SystemZ::VectorBytes]; |
5248 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { |
5249 | if (Bytes[I] >= 0) { |
5250 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
5251 | unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; |
5252 | if (OpNo == ZeroVecIdx) |
5253 | IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); |
5254 | else { |
5255 | unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; |
5256 | IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); |
5257 | } |
5258 | } else |
5259 | IndexNodes[I] = DAG.getUNDEF(MVT::i32); |
5260 | } |
5261 | SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); |
5262 | SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; |
5263 | if (MaskFirst) |
5264 | return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, |
5265 | Mask); |
5266 | else |
5267 | return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, |
5268 | Mask); |
5269 | } |
5270 | } |
5271 | |
5272 | SDValue IndexNodes[SystemZ::VectorBytes]; |
5273 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
5274 | if (Bytes[I] >= 0) |
5275 | IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); |
5276 | else |
5277 | IndexNodes[I] = DAG.getUNDEF(MVT::i32); |
5278 | SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); |
5279 | return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], |
5280 | (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); |
5281 | } |
5282 | |
5283 | namespace { |
5284 | // Describes a general N-operand vector shuffle. |
5285 | struct GeneralShuffle { |
5286 | GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} |
5287 | void addUndef(); |
5288 | bool add(SDValue, unsigned); |
5289 | SDValue getNode(SelectionDAG &, const SDLoc &); |
5290 | void tryPrepareForUnpack(); |
5291 | bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } |
5292 | SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); |
5293 | |
5294 | // The operands of the shuffle. |
5295 | SmallVector<SDValue, SystemZ::VectorBytes> Ops; |
5296 | |
5297 | // Index I is -1 if byte I of the result is undefined. Otherwise the |
5298 | // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand |
5299 | // Bytes[I] / SystemZ::VectorBytes. |
5300 | SmallVector<int, SystemZ::VectorBytes> Bytes; |
5301 | |
5302 | // The type of the shuffle result. |
5303 | EVT VT; |
5304 | |
5305 | // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. |
5306 | unsigned UnpackFromEltSize; |
5307 | }; |
5308 | } |
5309 | |
5310 | // Add an extra undefined element to the shuffle. |
5311 | void GeneralShuffle::addUndef() { |
5312 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5313 | for (unsigned I = 0; I < BytesPerElement; ++I) |
5314 | Bytes.push_back(Elt: -1); |
5315 | } |
5316 | |
5317 | // Add an extra element to the shuffle, taking it from element Elem of Op. |
5318 | // A null Op indicates a vector input whose value will be calculated later; |
5319 | // there is at most one such input per shuffle and it always has the same |
5320 | // type as the result. Aborts and returns false if the source vector elements |
5321 | // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per |
5322 | // LLVM they become implicitly extended, but this is rare and not optimized. |
5323 | bool GeneralShuffle::add(SDValue Op, unsigned Elem) { |
5324 | unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); |
5325 | |
5326 | // The source vector can have wider elements than the result, |
5327 | // either through an explicit TRUNCATE or because of type legalization. |
5328 | // We want the least significant part. |
5329 | EVT FromVT = Op.getNode() ? Op.getValueType() : VT; |
5330 | unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); |
5331 | |
5332 | // Return false if the source elements are smaller than their destination |
5333 | // elements. |
5334 | if (FromBytesPerElement < BytesPerElement) |
5335 | return false; |
5336 | |
5337 | unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + |
5338 | (FromBytesPerElement - BytesPerElement)); |
5339 | |
5340 | // Look through things like shuffles and bitcasts. |
5341 | while (Op.getNode()) { |
5342 | if (Op.getOpcode() == ISD::BITCAST) |
5343 | Op = Op.getOperand(i: 0); |
5344 | else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { |
5345 | // See whether the bytes we need come from a contiguous part of one |
5346 | // operand. |
5347 | SmallVector<int, SystemZ::VectorBytes> OpBytes; |
5348 | if (!getVPermMask(ShuffleOp: Op, Bytes&: OpBytes)) |
5349 | break; |
5350 | int NewByte; |
5351 | if (!getShuffleInput(Bytes: OpBytes, Start: Byte, BytesPerElement, Base&: NewByte)) |
5352 | break; |
5353 | if (NewByte < 0) { |
5354 | addUndef(); |
5355 | return true; |
5356 | } |
5357 | Op = Op.getOperand(i: unsigned(NewByte) / SystemZ::VectorBytes); |
5358 | Byte = unsigned(NewByte) % SystemZ::VectorBytes; |
5359 | } else if (Op.isUndef()) { |
5360 | addUndef(); |
5361 | return true; |
5362 | } else |
5363 | break; |
5364 | } |
5365 | |
5366 | // Make sure that the source of the extraction is in Ops. |
5367 | unsigned OpNo = 0; |
5368 | for (; OpNo < Ops.size(); ++OpNo) |
5369 | if (Ops[OpNo] == Op) |
5370 | break; |
5371 | if (OpNo == Ops.size()) |
5372 | Ops.push_back(Elt: Op); |
5373 | |
5374 | // Add the element to Bytes. |
5375 | unsigned Base = OpNo * SystemZ::VectorBytes + Byte; |
5376 | for (unsigned I = 0; I < BytesPerElement; ++I) |
5377 | Bytes.push_back(Elt: Base + I); |
5378 | |
5379 | return true; |
5380 | } |
5381 | |
5382 | // Return SDNodes for the completed shuffle. |
5383 | SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { |
5384 | assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector" ); |
5385 | |
5386 | if (Ops.size() == 0) |
5387 | return DAG.getUNDEF(VT); |
5388 | |
5389 | // Use a single unpack if possible as the last operation. |
5390 | tryPrepareForUnpack(); |
5391 | |
5392 | // Make sure that there are at least two shuffle operands. |
5393 | if (Ops.size() == 1) |
5394 | Ops.push_back(DAG.getUNDEF(MVT::v16i8)); |
5395 | |
5396 | // Create a tree of shuffles, deferring root node until after the loop. |
5397 | // Try to redistribute the undefined elements of non-root nodes so that |
5398 | // the non-root shuffles match something like a pack or merge, then adjust |
5399 | // the parent node's permute vector to compensate for the new order. |
5400 | // Among other things, this copes with vectors like <2 x i16> that were |
5401 | // padded with undefined elements during type legalization. |
5402 | // |
5403 | // In the best case this redistribution will lead to the whole tree |
5404 | // using packs and merges. It should rarely be a loss in other cases. |
5405 | unsigned Stride = 1; |
5406 | for (; Stride * 2 < Ops.size(); Stride *= 2) { |
5407 | for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { |
5408 | SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; |
5409 | |
5410 | // Create a mask for just these two operands. |
5411 | SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); |
5412 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { |
5413 | unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; |
5414 | unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; |
5415 | if (OpNo == I) |
5416 | NewBytes[J] = Byte; |
5417 | else if (OpNo == I + Stride) |
5418 | NewBytes[J] = SystemZ::VectorBytes + Byte; |
5419 | else |
5420 | NewBytes[J] = -1; |
5421 | } |
5422 | // See if it would be better to reorganize NewMask to avoid using VPERM. |
5423 | SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); |
5424 | if (const Permute *P = matchDoublePermute(Bytes: NewBytes, Transform&: NewBytesMap)) { |
5425 | Ops[I] = getPermuteNode(DAG, DL, P: *P, Op0: SubOps[0], Op1: SubOps[1]); |
5426 | // Applying NewBytesMap to Ops[I] gets back to NewBytes. |
5427 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { |
5428 | if (NewBytes[J] >= 0) { |
5429 | assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && |
5430 | "Invalid double permute" ); |
5431 | Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; |
5432 | } else |
5433 | assert(NewBytesMap[J] < 0 && "Invalid double permute" ); |
5434 | } |
5435 | } else { |
5436 | // Just use NewBytes on the operands. |
5437 | Ops[I] = getGeneralPermuteNode(DAG, DL, Ops: SubOps, Bytes: NewBytes); |
5438 | for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) |
5439 | if (NewBytes[J] >= 0) |
5440 | Bytes[J] = I * SystemZ::VectorBytes + J; |
5441 | } |
5442 | } |
5443 | } |
5444 | |
5445 | // Now we just have 2 inputs. Put the second operand in Ops[1]. |
5446 | if (Stride > 1) { |
5447 | Ops[1] = Ops[Stride]; |
5448 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
5449 | if (Bytes[I] >= int(SystemZ::VectorBytes)) |
5450 | Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; |
5451 | } |
5452 | |
5453 | // Look for an instruction that can do the permute without resorting |
5454 | // to VPERM. |
5455 | unsigned OpNo0, OpNo1; |
5456 | SDValue Op; |
5457 | if (unpackWasPrepared() && Ops[1].isUndef()) |
5458 | Op = Ops[0]; |
5459 | else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) |
5460 | Op = getPermuteNode(DAG, DL, P: *P, Op0: Ops[OpNo0], Op1: Ops[OpNo1]); |
5461 | else |
5462 | Op = getGeneralPermuteNode(DAG, DL, Ops: &Ops[0], Bytes); |
5463 | |
5464 | Op = insertUnpackIfPrepared(DAG, DL, Op); |
5465 | |
5466 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
5467 | } |
5468 | |
5469 | #ifndef NDEBUG |
5470 | static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { |
5471 | dbgs() << Msg.c_str() << " { " ; |
5472 | for (unsigned i = 0; i < Bytes.size(); i++) |
5473 | dbgs() << Bytes[i] << " " ; |
5474 | dbgs() << "}\n" ; |
5475 | } |
5476 | #endif |
5477 | |
5478 | // If the Bytes vector matches an unpack operation, prepare to do the unpack |
5479 | // after all else by removing the zero vector and the effect of the unpack on |
5480 | // Bytes. |
5481 | void GeneralShuffle::tryPrepareForUnpack() { |
5482 | uint32_t ZeroVecOpNo = findZeroVectorIdx(Ops: &Ops[0], Num: Ops.size()); |
5483 | if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) |
5484 | return; |
5485 | |
5486 | // Only do this if removing the zero vector reduces the depth, otherwise |
5487 | // the critical path will increase with the final unpack. |
5488 | if (Ops.size() > 2 && |
5489 | Log2_32_Ceil(Value: Ops.size()) == Log2_32_Ceil(Value: Ops.size() - 1)) |
5490 | return; |
5491 | |
5492 | // Find an unpack that would allow removing the zero vector from Ops. |
5493 | UnpackFromEltSize = 1; |
5494 | for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { |
5495 | bool MatchUnpack = true; |
5496 | SmallVector<int, SystemZ::VectorBytes> SrcBytes; |
5497 | for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { |
5498 | unsigned ToEltSize = UnpackFromEltSize * 2; |
5499 | bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; |
5500 | if (!IsZextByte) |
5501 | SrcBytes.push_back(Elt: Bytes[Elt]); |
5502 | if (Bytes[Elt] != -1) { |
5503 | unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; |
5504 | if (IsZextByte != (OpNo == ZeroVecOpNo)) { |
5505 | MatchUnpack = false; |
5506 | break; |
5507 | } |
5508 | } |
5509 | } |
5510 | if (MatchUnpack) { |
5511 | if (Ops.size() == 2) { |
5512 | // Don't use unpack if a single source operand needs rearrangement. |
5513 | for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) |
5514 | if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { |
5515 | UnpackFromEltSize = UINT_MAX; |
5516 | return; |
5517 | } |
5518 | } |
5519 | break; |
5520 | } |
5521 | } |
5522 | if (UnpackFromEltSize > 4) |
5523 | return; |
5524 | |
5525 | LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " |
5526 | << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo |
5527 | << ".\n" ; |
5528 | dumpBytes(Bytes, "Original Bytes vector:" );); |
5529 | |
5530 | // Apply the unpack in reverse to the Bytes array. |
5531 | unsigned B = 0; |
5532 | for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { |
5533 | Elt += UnpackFromEltSize; |
5534 | for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) |
5535 | Bytes[B] = Bytes[Elt]; |
5536 | } |
5537 | while (B < SystemZ::VectorBytes) |
5538 | Bytes[B++] = -1; |
5539 | |
5540 | // Remove the zero vector from Ops |
5541 | Ops.erase(CI: &Ops[ZeroVecOpNo]); |
5542 | for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) |
5543 | if (Bytes[I] >= 0) { |
5544 | unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; |
5545 | if (OpNo > ZeroVecOpNo) |
5546 | Bytes[I] -= SystemZ::VectorBytes; |
5547 | } |
5548 | |
5549 | LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:" ); |
5550 | dbgs() << "\n" ;); |
5551 | } |
5552 | |
5553 | SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, |
5554 | const SDLoc &DL, |
5555 | SDValue Op) { |
5556 | if (!unpackWasPrepared()) |
5557 | return Op; |
5558 | unsigned InBits = UnpackFromEltSize * 8; |
5559 | EVT InVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: InBits), |
5560 | NumElements: SystemZ::VectorBits / InBits); |
5561 | SDValue PackedOp = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: InVT, Operand: Op); |
5562 | unsigned OutBits = InBits * 2; |
5563 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: OutBits), |
5564 | NumElements: SystemZ::VectorBits / OutBits); |
5565 | return DAG.getNode(Opcode: SystemZISD::UNPACKL_HIGH, DL, VT: OutVT, Operand: PackedOp); |
5566 | } |
5567 | |
5568 | // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. |
5569 | static bool isScalarToVector(SDValue Op) { |
5570 | for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) |
5571 | if (!Op.getOperand(i: I).isUndef()) |
5572 | return false; |
5573 | return true; |
5574 | } |
5575 | |
5576 | // Return a vector of type VT that contains Value in the first element. |
5577 | // The other elements don't matter. |
5578 | static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5579 | SDValue Value) { |
5580 | // If we have a constant, replicate it to all elements and let the |
5581 | // BUILD_VECTOR lowering take care of it. |
5582 | if (Value.getOpcode() == ISD::Constant || |
5583 | Value.getOpcode() == ISD::ConstantFP) { |
5584 | SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); |
5585 | return DAG.getBuildVector(VT, DL, Ops); |
5586 | } |
5587 | if (Value.isUndef()) |
5588 | return DAG.getUNDEF(VT); |
5589 | return DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT, Operand: Value); |
5590 | } |
5591 | |
5592 | // Return a vector of type VT in which Op0 is in element 0 and Op1 is in |
5593 | // element 1. Used for cases in which replication is cheap. |
5594 | static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5595 | SDValue Op0, SDValue Op1) { |
5596 | if (Op0.isUndef()) { |
5597 | if (Op1.isUndef()) |
5598 | return DAG.getUNDEF(VT); |
5599 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op1); |
5600 | } |
5601 | if (Op1.isUndef()) |
5602 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0); |
5603 | return DAG.getNode(Opcode: SystemZISD::MERGE_HIGH, DL, VT, |
5604 | N1: buildScalarToVector(DAG, DL, VT, Value: Op0), |
5605 | N2: buildScalarToVector(DAG, DL, VT, Value: Op1)); |
5606 | } |
5607 | |
5608 | // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 |
5609 | // vector for them. |
5610 | static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, |
5611 | SDValue Op1) { |
5612 | if (Op0.isUndef() && Op1.isUndef()) |
5613 | return DAG.getUNDEF(MVT::v2i64); |
5614 | // If one of the two inputs is undefined then replicate the other one, |
5615 | // in order to avoid using another register unnecessarily. |
5616 | if (Op0.isUndef()) |
5617 | Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); |
5618 | else if (Op1.isUndef()) |
5619 | Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
5620 | else { |
5621 | Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); |
5622 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); |
5623 | } |
5624 | return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); |
5625 | } |
5626 | |
5627 | // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually |
5628 | // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for |
5629 | // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR |
5630 | // would benefit from this representation and return it if so. |
5631 | static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, |
5632 | BuildVectorSDNode *BVN) { |
5633 | EVT VT = BVN->getValueType(ResNo: 0); |
5634 | unsigned NumElements = VT.getVectorNumElements(); |
5635 | |
5636 | // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation |
5637 | // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still |
5638 | // need a BUILD_VECTOR, add an additional placeholder operand for that |
5639 | // BUILD_VECTOR and store its operands in ResidueOps. |
5640 | GeneralShuffle GS(VT); |
5641 | SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; |
5642 | bool FoundOne = false; |
5643 | for (unsigned I = 0; I < NumElements; ++I) { |
5644 | SDValue Op = BVN->getOperand(Num: I); |
5645 | if (Op.getOpcode() == ISD::TRUNCATE) |
5646 | Op = Op.getOperand(i: 0); |
5647 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
5648 | Op.getOperand(i: 1).getOpcode() == ISD::Constant) { |
5649 | unsigned Elem = Op.getConstantOperandVal(i: 1); |
5650 | if (!GS.add(Op: Op.getOperand(i: 0), Elem)) |
5651 | return SDValue(); |
5652 | FoundOne = true; |
5653 | } else if (Op.isUndef()) { |
5654 | GS.addUndef(); |
5655 | } else { |
5656 | if (!GS.add(Op: SDValue(), Elem: ResidueOps.size())) |
5657 | return SDValue(); |
5658 | ResidueOps.push_back(Elt: BVN->getOperand(Num: I)); |
5659 | } |
5660 | } |
5661 | |
5662 | // Nothing to do if there are no EXTRACT_VECTOR_ELTs. |
5663 | if (!FoundOne) |
5664 | return SDValue(); |
5665 | |
5666 | // Create the BUILD_VECTOR for the remaining elements, if any. |
5667 | if (!ResidueOps.empty()) { |
5668 | while (ResidueOps.size() < NumElements) |
5669 | ResidueOps.push_back(Elt: DAG.getUNDEF(VT: ResidueOps[0].getValueType())); |
5670 | for (auto &Op : GS.Ops) { |
5671 | if (!Op.getNode()) { |
5672 | Op = DAG.getBuildVector(VT, DL: SDLoc(BVN), Ops: ResidueOps); |
5673 | break; |
5674 | } |
5675 | } |
5676 | } |
5677 | return GS.getNode(DAG, DL: SDLoc(BVN)); |
5678 | } |
5679 | |
5680 | bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { |
5681 | if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Val&: Op)->isUnindexed()) |
5682 | return true; |
5683 | if (auto *AL = dyn_cast<AtomicSDNode>(Val&: Op)) |
5684 | if (AL->getOpcode() == ISD::ATOMIC_LOAD) |
5685 | return true; |
5686 | if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) |
5687 | return true; |
5688 | return false; |
5689 | } |
5690 | |
5691 | // Combine GPR scalar values Elems into a vector of type VT. |
5692 | SDValue |
5693 | SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, |
5694 | SmallVectorImpl<SDValue> &Elems) const { |
5695 | // See whether there is a single replicated value. |
5696 | SDValue Single; |
5697 | unsigned int NumElements = Elems.size(); |
5698 | unsigned int Count = 0; |
5699 | for (auto Elem : Elems) { |
5700 | if (!Elem.isUndef()) { |
5701 | if (!Single.getNode()) |
5702 | Single = Elem; |
5703 | else if (Elem != Single) { |
5704 | Single = SDValue(); |
5705 | break; |
5706 | } |
5707 | Count += 1; |
5708 | } |
5709 | } |
5710 | // There are three cases here: |
5711 | // |
5712 | // - if the only defined element is a loaded one, the best sequence |
5713 | // is a replicating load. |
5714 | // |
5715 | // - otherwise, if the only defined element is an i64 value, we will |
5716 | // end up with the same VLVGP sequence regardless of whether we short-cut |
5717 | // for replication or fall through to the later code. |
5718 | // |
5719 | // - otherwise, if the only defined element is an i32 or smaller value, |
5720 | // we would need 2 instructions to replicate it: VLVGP followed by VREPx. |
5721 | // This is only a win if the single defined element is used more than once. |
5722 | // In other cases we're better off using a single VLVGx. |
5723 | if (Single.getNode() && (Count > 1 || isVectorElementLoad(Op: Single))) |
5724 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Single); |
5725 | |
5726 | // If all elements are loads, use VLREP/VLEs (below). |
5727 | bool AllLoads = true; |
5728 | for (auto Elem : Elems) |
5729 | if (!isVectorElementLoad(Op: Elem)) { |
5730 | AllLoads = false; |
5731 | break; |
5732 | } |
5733 | |
5734 | // The best way of building a v2i64 from two i64s is to use VLVGP. |
5735 | if (VT == MVT::v2i64 && !AllLoads) |
5736 | return joinDwords(DAG, DL, Op0: Elems[0], Op1: Elems[1]); |
5737 | |
5738 | // Use a 64-bit merge high to combine two doubles. |
5739 | if (VT == MVT::v2f64 && !AllLoads) |
5740 | return buildMergeScalars(DAG, DL, VT, Op0: Elems[0], Op1: Elems[1]); |
5741 | |
5742 | // Build v4f32 values directly from the FPRs: |
5743 | // |
5744 | // <Axxx> <Bxxx> <Cxxxx> <Dxxx> |
5745 | // V V VMRHF |
5746 | // <ABxx> <CDxx> |
5747 | // V VMRHG |
5748 | // <ABCD> |
5749 | if (VT == MVT::v4f32 && !AllLoads) { |
5750 | SDValue Op01 = buildMergeScalars(DAG, DL, VT, Op0: Elems[0], Op1: Elems[1]); |
5751 | SDValue Op23 = buildMergeScalars(DAG, DL, VT, Op0: Elems[2], Op1: Elems[3]); |
5752 | // Avoid unnecessary undefs by reusing the other operand. |
5753 | if (Op01.isUndef()) |
5754 | Op01 = Op23; |
5755 | else if (Op23.isUndef()) |
5756 | Op23 = Op01; |
5757 | // Merging identical replications is a no-op. |
5758 | if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) |
5759 | return Op01; |
5760 | Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); |
5761 | Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); |
5762 | SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, |
5763 | DL, MVT::v2i64, Op01, Op23); |
5764 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
5765 | } |
5766 | |
5767 | // Collect the constant terms. |
5768 | SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); |
5769 | SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); |
5770 | |
5771 | unsigned NumConstants = 0; |
5772 | for (unsigned I = 0; I < NumElements; ++I) { |
5773 | SDValue Elem = Elems[I]; |
5774 | if (Elem.getOpcode() == ISD::Constant || |
5775 | Elem.getOpcode() == ISD::ConstantFP) { |
5776 | NumConstants += 1; |
5777 | Constants[I] = Elem; |
5778 | Done[I] = true; |
5779 | } |
5780 | } |
5781 | // If there was at least one constant, fill in the other elements of |
5782 | // Constants with undefs to get a full vector constant and use that |
5783 | // as the starting point. |
5784 | SDValue Result; |
5785 | SDValue ReplicatedVal; |
5786 | if (NumConstants > 0) { |
5787 | for (unsigned I = 0; I < NumElements; ++I) |
5788 | if (!Constants[I].getNode()) |
5789 | Constants[I] = DAG.getUNDEF(VT: Elems[I].getValueType()); |
5790 | Result = DAG.getBuildVector(VT, DL, Ops: Constants); |
5791 | } else { |
5792 | // Otherwise try to use VLREP or VLVGP to start the sequence in order to |
5793 | // avoid a false dependency on any previous contents of the vector |
5794 | // register. |
5795 | |
5796 | // Use a VLREP if at least one element is a load. Make sure to replicate |
5797 | // the load with the most elements having its value. |
5798 | std::map<const SDNode*, unsigned> UseCounts; |
5799 | SDNode *LoadMaxUses = nullptr; |
5800 | for (unsigned I = 0; I < NumElements; ++I) |
5801 | if (isVectorElementLoad(Op: Elems[I])) { |
5802 | SDNode *Ld = Elems[I].getNode(); |
5803 | UseCounts[Ld]++; |
5804 | if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) |
5805 | LoadMaxUses = Ld; |
5806 | } |
5807 | if (LoadMaxUses != nullptr) { |
5808 | ReplicatedVal = SDValue(LoadMaxUses, 0); |
5809 | Result = DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: ReplicatedVal); |
5810 | } else { |
5811 | // Try to use VLVGP. |
5812 | unsigned I1 = NumElements / 2 - 1; |
5813 | unsigned I2 = NumElements - 1; |
5814 | bool Def1 = !Elems[I1].isUndef(); |
5815 | bool Def2 = !Elems[I2].isUndef(); |
5816 | if (Def1 || Def2) { |
5817 | SDValue Elem1 = Elems[Def1 ? I1 : I2]; |
5818 | SDValue Elem2 = Elems[Def2 ? I2 : I1]; |
5819 | Result = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, |
5820 | Operand: joinDwords(DAG, DL, Op0: Elem1, Op1: Elem2)); |
5821 | Done[I1] = true; |
5822 | Done[I2] = true; |
5823 | } else |
5824 | Result = DAG.getUNDEF(VT); |
5825 | } |
5826 | } |
5827 | |
5828 | // Use VLVGx to insert the other elements. |
5829 | for (unsigned I = 0; I < NumElements; ++I) |
5830 | if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) |
5831 | Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], |
5832 | DAG.getConstant(I, DL, MVT::i32)); |
5833 | return Result; |
5834 | } |
5835 | |
5836 | SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, |
5837 | SelectionDAG &DAG) const { |
5838 | auto *BVN = cast<BuildVectorSDNode>(Val: Op.getNode()); |
5839 | SDLoc DL(Op); |
5840 | EVT VT = Op.getValueType(); |
5841 | |
5842 | if (BVN->isConstant()) { |
5843 | if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) |
5844 | return Op; |
5845 | |
5846 | // Fall back to loading it from memory. |
5847 | return SDValue(); |
5848 | } |
5849 | |
5850 | // See if we should use shuffles to construct the vector from other vectors. |
5851 | if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) |
5852 | return Res; |
5853 | |
5854 | // Detect SCALAR_TO_VECTOR conversions. |
5855 | if (isOperationLegal(Op: ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) |
5856 | return buildScalarToVector(DAG, DL, VT, Value: Op.getOperand(i: 0)); |
5857 | |
5858 | // Otherwise use buildVector to build the vector up from GPRs. |
5859 | unsigned NumElements = Op.getNumOperands(); |
5860 | SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); |
5861 | for (unsigned I = 0; I < NumElements; ++I) |
5862 | Ops[I] = Op.getOperand(i: I); |
5863 | return buildVector(DAG, DL, VT, Elems&: Ops); |
5864 | } |
5865 | |
5866 | SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, |
5867 | SelectionDAG &DAG) const { |
5868 | auto *VSN = cast<ShuffleVectorSDNode>(Val: Op.getNode()); |
5869 | SDLoc DL(Op); |
5870 | EVT VT = Op.getValueType(); |
5871 | unsigned NumElements = VT.getVectorNumElements(); |
5872 | |
5873 | if (VSN->isSplat()) { |
5874 | SDValue Op0 = Op.getOperand(i: 0); |
5875 | unsigned Index = VSN->getSplatIndex(); |
5876 | assert(Index < VT.getVectorNumElements() && |
5877 | "Splat index should be defined and in first operand" ); |
5878 | // See whether the value we're splatting is directly available as a scalar. |
5879 | if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || |
5880 | Op0.getOpcode() == ISD::BUILD_VECTOR) |
5881 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL, VT, Operand: Op0.getOperand(i: Index)); |
5882 | // Otherwise keep it as a vector-to-vector operation. |
5883 | return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), |
5884 | DAG.getTargetConstant(Index, DL, MVT::i32)); |
5885 | } |
5886 | |
5887 | GeneralShuffle GS(VT); |
5888 | for (unsigned I = 0; I < NumElements; ++I) { |
5889 | int Elt = VSN->getMaskElt(Idx: I); |
5890 | if (Elt < 0) |
5891 | GS.addUndef(); |
5892 | else if (!GS.add(Op: Op.getOperand(i: unsigned(Elt) / NumElements), |
5893 | Elem: unsigned(Elt) % NumElements)) |
5894 | return SDValue(); |
5895 | } |
5896 | return GS.getNode(DAG, DL: SDLoc(VSN)); |
5897 | } |
5898 | |
5899 | SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, |
5900 | SelectionDAG &DAG) const { |
5901 | SDLoc DL(Op); |
5902 | // Just insert the scalar into element 0 of an undefined vector. |
5903 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, |
5904 | Op.getValueType(), DAG.getUNDEF(Op.getValueType()), |
5905 | Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); |
5906 | } |
5907 | |
5908 | SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, |
5909 | SelectionDAG &DAG) const { |
5910 | // Handle insertions of floating-point values. |
5911 | SDLoc DL(Op); |
5912 | SDValue Op0 = Op.getOperand(i: 0); |
5913 | SDValue Op1 = Op.getOperand(i: 1); |
5914 | SDValue Op2 = Op.getOperand(i: 2); |
5915 | EVT VT = Op.getValueType(); |
5916 | |
5917 | // Insertions into constant indices of a v2f64 can be done using VPDI. |
5918 | // However, if the inserted value is a bitcast or a constant then it's |
5919 | // better to use GPRs, as below. |
5920 | if (VT == MVT::v2f64 && |
5921 | Op1.getOpcode() != ISD::BITCAST && |
5922 | Op1.getOpcode() != ISD::ConstantFP && |
5923 | Op2.getOpcode() == ISD::Constant) { |
5924 | uint64_t Index = Op2->getAsZExtVal(); |
5925 | unsigned Mask = VT.getVectorNumElements() - 1; |
5926 | if (Index <= Mask) |
5927 | return Op; |
5928 | } |
5929 | |
5930 | // Otherwise bitcast to the equivalent integer form and insert via a GPR. |
5931 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getScalarSizeInBits()); |
5932 | MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VT.getVectorNumElements()); |
5933 | SDValue Res = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: IntVecVT, |
5934 | N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), |
5935 | N2: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVT, Operand: Op1), N3: Op2); |
5936 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Res); |
5937 | } |
5938 | |
5939 | SDValue |
5940 | SystemZTargetLowering::(SDValue Op, |
5941 | SelectionDAG &DAG) const { |
5942 | // Handle extractions of floating-point values. |
5943 | SDLoc DL(Op); |
5944 | SDValue Op0 = Op.getOperand(i: 0); |
5945 | SDValue Op1 = Op.getOperand(i: 1); |
5946 | EVT VT = Op.getValueType(); |
5947 | EVT VecVT = Op0.getValueType(); |
5948 | |
5949 | // Extractions of constant indices can be done directly. |
5950 | if (auto *CIndexN = dyn_cast<ConstantSDNode>(Val&: Op1)) { |
5951 | uint64_t Index = CIndexN->getZExtValue(); |
5952 | unsigned Mask = VecVT.getVectorNumElements() - 1; |
5953 | if (Index <= Mask) |
5954 | return Op; |
5955 | } |
5956 | |
5957 | // Otherwise bitcast to the equivalent integer form and extract via a GPR. |
5958 | MVT IntVT = MVT::getIntegerVT(BitWidth: VT.getSizeInBits()); |
5959 | MVT IntVecVT = MVT::getVectorVT(VT: IntVT, NumElements: VecVT.getVectorNumElements()); |
5960 | SDValue Res = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: IntVT, |
5961 | N1: DAG.getNode(Opcode: ISD::BITCAST, DL, VT: IntVecVT, Operand: Op0), N2: Op1); |
5962 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Res); |
5963 | } |
5964 | |
5965 | SDValue SystemZTargetLowering:: |
5966 | lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { |
5967 | SDValue PackedOp = Op.getOperand(i: 0); |
5968 | EVT OutVT = Op.getValueType(); |
5969 | EVT InVT = PackedOp.getValueType(); |
5970 | unsigned ToBits = OutVT.getScalarSizeInBits(); |
5971 | unsigned FromBits = InVT.getScalarSizeInBits(); |
5972 | do { |
5973 | FromBits *= 2; |
5974 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: FromBits), |
5975 | NumElements: SystemZ::VectorBits / FromBits); |
5976 | PackedOp = |
5977 | DAG.getNode(Opcode: SystemZISD::UNPACK_HIGH, DL: SDLoc(PackedOp), VT: OutVT, Operand: PackedOp); |
5978 | } while (FromBits != ToBits); |
5979 | return PackedOp; |
5980 | } |
5981 | |
5982 | // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. |
5983 | SDValue SystemZTargetLowering:: |
5984 | lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { |
5985 | SDValue PackedOp = Op.getOperand(i: 0); |
5986 | SDLoc DL(Op); |
5987 | EVT OutVT = Op.getValueType(); |
5988 | EVT InVT = PackedOp.getValueType(); |
5989 | unsigned InNumElts = InVT.getVectorNumElements(); |
5990 | unsigned OutNumElts = OutVT.getVectorNumElements(); |
5991 | unsigned NumInPerOut = InNumElts / OutNumElts; |
5992 | |
5993 | SDValue ZeroVec = |
5994 | DAG.getSplatVector(VT: InVT, DL, Op: DAG.getConstant(Val: 0, DL, VT: InVT.getScalarType())); |
5995 | |
5996 | SmallVector<int, 16> Mask(InNumElts); |
5997 | unsigned ZeroVecElt = InNumElts; |
5998 | for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { |
5999 | unsigned MaskElt = PackedElt * NumInPerOut; |
6000 | unsigned End = MaskElt + NumInPerOut - 1; |
6001 | for (; MaskElt < End; MaskElt++) |
6002 | Mask[MaskElt] = ZeroVecElt++; |
6003 | Mask[MaskElt] = PackedElt; |
6004 | } |
6005 | SDValue Shuf = DAG.getVectorShuffle(VT: InVT, dl: DL, N1: PackedOp, N2: ZeroVec, Mask); |
6006 | return DAG.getNode(Opcode: ISD::BITCAST, DL, VT: OutVT, Operand: Shuf); |
6007 | } |
6008 | |
6009 | SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, |
6010 | unsigned ByScalar) const { |
6011 | // Look for cases where a vector shift can use the *_BY_SCALAR form. |
6012 | SDValue Op0 = Op.getOperand(i: 0); |
6013 | SDValue Op1 = Op.getOperand(i: 1); |
6014 | SDLoc DL(Op); |
6015 | EVT VT = Op.getValueType(); |
6016 | unsigned ElemBitSize = VT.getScalarSizeInBits(); |
6017 | |
6018 | // See whether the shift vector is a splat represented as BUILD_VECTOR. |
6019 | if (auto *BVN = dyn_cast<BuildVectorSDNode>(Val&: Op1)) { |
6020 | APInt SplatBits, SplatUndef; |
6021 | unsigned SplatBitSize; |
6022 | bool HasAnyUndefs; |
6023 | // Check for constant splats. Use ElemBitSize as the minimum element |
6024 | // width and reject splats that need wider elements. |
6025 | if (BVN->isConstantSplat(SplatValue&: SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, |
6026 | MinSplatBits: ElemBitSize, isBigEndian: true) && |
6027 | SplatBitSize == ElemBitSize) { |
6028 | SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, |
6029 | DL, MVT::i32); |
6030 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6031 | } |
6032 | // Check for variable splats. |
6033 | BitVector UndefElements; |
6034 | SDValue Splat = BVN->getSplatValue(UndefElements: &UndefElements); |
6035 | if (Splat) { |
6036 | // Since i32 is the smallest legal type, we either need a no-op |
6037 | // or a truncation. |
6038 | SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); |
6039 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6040 | } |
6041 | } |
6042 | |
6043 | // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, |
6044 | // and the shift amount is directly available in a GPR. |
6045 | if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Val&: Op1)) { |
6046 | if (VSN->isSplat()) { |
6047 | SDValue VSNOp0 = VSN->getOperand(Num: 0); |
6048 | unsigned Index = VSN->getSplatIndex(); |
6049 | assert(Index < VT.getVectorNumElements() && |
6050 | "Splat index should be defined and in first operand" ); |
6051 | if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || |
6052 | VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { |
6053 | // Since i32 is the smallest legal type, we either need a no-op |
6054 | // or a truncation. |
6055 | SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, |
6056 | VSNOp0.getOperand(Index)); |
6057 | return DAG.getNode(Opcode: ByScalar, DL, VT, N1: Op0, N2: Shift); |
6058 | } |
6059 | } |
6060 | } |
6061 | |
6062 | // Otherwise just treat the current form as legal. |
6063 | return Op; |
6064 | } |
6065 | |
6066 | SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, |
6067 | SelectionDAG &DAG) const { |
6068 | SDLoc DL(Op); |
6069 | MVT ResultVT = Op.getSimpleValueType(); |
6070 | SDValue Arg = Op.getOperand(i: 0); |
6071 | unsigned Check = Op.getConstantOperandVal(i: 1); |
6072 | |
6073 | unsigned TDCMask = 0; |
6074 | if (Check & fcSNan) |
6075 | TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; |
6076 | if (Check & fcQNan) |
6077 | TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; |
6078 | if (Check & fcPosInf) |
6079 | TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; |
6080 | if (Check & fcNegInf) |
6081 | TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; |
6082 | if (Check & fcPosNormal) |
6083 | TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; |
6084 | if (Check & fcNegNormal) |
6085 | TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; |
6086 | if (Check & fcPosSubnormal) |
6087 | TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; |
6088 | if (Check & fcNegSubnormal) |
6089 | TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; |
6090 | if (Check & fcPosZero) |
6091 | TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; |
6092 | if (Check & fcNegZero) |
6093 | TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; |
6094 | SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); |
6095 | |
6096 | SDValue Intr = DAG.getNode(Opcode: SystemZISD::TDC, DL, VT: ResultVT, N1: Arg, N2: TDCMaskV); |
6097 | return getCCResult(DAG, CCReg: Intr); |
6098 | } |
6099 | |
6100 | SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, |
6101 | SelectionDAG &DAG) const { |
6102 | SDLoc DL(Op); |
6103 | SDValue Chain = Op.getOperand(i: 0); |
6104 | |
6105 | // STCKF only supports a memory operand, so we have to use a temporary. |
6106 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
6107 | int SPFI = cast<FrameIndexSDNode>(Val: StackPtr.getNode())->getIndex(); |
6108 | MachinePointerInfo MPI = |
6109 | MachinePointerInfo::getFixedStack(MF&: DAG.getMachineFunction(), FI: SPFI); |
6110 | |
6111 | // Use STCFK to store the TOD clock into the temporary. |
6112 | SDValue StoreOps[] = {Chain, StackPtr}; |
6113 | Chain = DAG.getMemIntrinsicNode( |
6114 | SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, |
6115 | MPI, MaybeAlign(), MachineMemOperand::MOStore); |
6116 | |
6117 | // And read it back from there. |
6118 | return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); |
6119 | } |
6120 | |
6121 | SDValue SystemZTargetLowering::LowerOperation(SDValue Op, |
6122 | SelectionDAG &DAG) const { |
6123 | switch (Op.getOpcode()) { |
6124 | case ISD::FRAMEADDR: |
6125 | return lowerFRAMEADDR(Op, DAG); |
6126 | case ISD::RETURNADDR: |
6127 | return lowerRETURNADDR(Op, DAG); |
6128 | case ISD::BR_CC: |
6129 | return lowerBR_CC(Op, DAG); |
6130 | case ISD::SELECT_CC: |
6131 | return lowerSELECT_CC(Op, DAG); |
6132 | case ISD::SETCC: |
6133 | return lowerSETCC(Op, DAG); |
6134 | case ISD::STRICT_FSETCC: |
6135 | return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: false); |
6136 | case ISD::STRICT_FSETCCS: |
6137 | return lowerSTRICT_FSETCC(Op, DAG, IsSignaling: true); |
6138 | case ISD::GlobalAddress: |
6139 | return lowerGlobalAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG); |
6140 | case ISD::GlobalTLSAddress: |
6141 | return lowerGlobalTLSAddress(Node: cast<GlobalAddressSDNode>(Val&: Op), DAG); |
6142 | case ISD::BlockAddress: |
6143 | return lowerBlockAddress(Node: cast<BlockAddressSDNode>(Val&: Op), DAG); |
6144 | case ISD::JumpTable: |
6145 | return lowerJumpTable(JT: cast<JumpTableSDNode>(Val&: Op), DAG); |
6146 | case ISD::ConstantPool: |
6147 | return lowerConstantPool(CP: cast<ConstantPoolSDNode>(Val&: Op), DAG); |
6148 | case ISD::BITCAST: |
6149 | return lowerBITCAST(Op, DAG); |
6150 | case ISD::VASTART: |
6151 | return lowerVASTART(Op, DAG); |
6152 | case ISD::VACOPY: |
6153 | return lowerVACOPY(Op, DAG); |
6154 | case ISD::DYNAMIC_STACKALLOC: |
6155 | return lowerDYNAMIC_STACKALLOC(Op, DAG); |
6156 | case ISD::GET_DYNAMIC_AREA_OFFSET: |
6157 | return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); |
6158 | case ISD::SMUL_LOHI: |
6159 | return lowerSMUL_LOHI(Op, DAG); |
6160 | case ISD::UMUL_LOHI: |
6161 | return lowerUMUL_LOHI(Op, DAG); |
6162 | case ISD::SDIVREM: |
6163 | return lowerSDIVREM(Op, DAG); |
6164 | case ISD::UDIVREM: |
6165 | return lowerUDIVREM(Op, DAG); |
6166 | case ISD::SADDO: |
6167 | case ISD::SSUBO: |
6168 | case ISD::UADDO: |
6169 | case ISD::USUBO: |
6170 | return lowerXALUO(Op, DAG); |
6171 | case ISD::UADDO_CARRY: |
6172 | case ISD::USUBO_CARRY: |
6173 | return lowerUADDSUBO_CARRY(Op, DAG); |
6174 | case ISD::OR: |
6175 | return lowerOR(Op, DAG); |
6176 | case ISD::CTPOP: |
6177 | return lowerCTPOP(Op, DAG); |
6178 | case ISD::VECREDUCE_ADD: |
6179 | return lowerVECREDUCE_ADD(Op, DAG); |
6180 | case ISD::ATOMIC_FENCE: |
6181 | return lowerATOMIC_FENCE(Op, DAG); |
6182 | case ISD::ATOMIC_SWAP: |
6183 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_SWAPW); |
6184 | case ISD::ATOMIC_STORE: |
6185 | case ISD::ATOMIC_LOAD: |
6186 | return lowerATOMIC_LDST_I128(Op, DAG); |
6187 | case ISD::ATOMIC_LOAD_ADD: |
6188 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_ADD); |
6189 | case ISD::ATOMIC_LOAD_SUB: |
6190 | return lowerATOMIC_LOAD_SUB(Op, DAG); |
6191 | case ISD::ATOMIC_LOAD_AND: |
6192 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_AND); |
6193 | case ISD::ATOMIC_LOAD_OR: |
6194 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_OR); |
6195 | case ISD::ATOMIC_LOAD_XOR: |
6196 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_XOR); |
6197 | case ISD::ATOMIC_LOAD_NAND: |
6198 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_NAND); |
6199 | case ISD::ATOMIC_LOAD_MIN: |
6200 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MIN); |
6201 | case ISD::ATOMIC_LOAD_MAX: |
6202 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_MAX); |
6203 | case ISD::ATOMIC_LOAD_UMIN: |
6204 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMIN); |
6205 | case ISD::ATOMIC_LOAD_UMAX: |
6206 | return lowerATOMIC_LOAD_OP(Op, DAG, Opcode: SystemZISD::ATOMIC_LOADW_UMAX); |
6207 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
6208 | return lowerATOMIC_CMP_SWAP(Op, DAG); |
6209 | case ISD::STACKSAVE: |
6210 | return lowerSTACKSAVE(Op, DAG); |
6211 | case ISD::STACKRESTORE: |
6212 | return lowerSTACKRESTORE(Op, DAG); |
6213 | case ISD::PREFETCH: |
6214 | return lowerPREFETCH(Op, DAG); |
6215 | case ISD::INTRINSIC_W_CHAIN: |
6216 | return lowerINTRINSIC_W_CHAIN(Op, DAG); |
6217 | case ISD::INTRINSIC_WO_CHAIN: |
6218 | return lowerINTRINSIC_WO_CHAIN(Op, DAG); |
6219 | case ISD::BUILD_VECTOR: |
6220 | return lowerBUILD_VECTOR(Op, DAG); |
6221 | case ISD::VECTOR_SHUFFLE: |
6222 | return lowerVECTOR_SHUFFLE(Op, DAG); |
6223 | case ISD::SCALAR_TO_VECTOR: |
6224 | return lowerSCALAR_TO_VECTOR(Op, DAG); |
6225 | case ISD::INSERT_VECTOR_ELT: |
6226 | return lowerINSERT_VECTOR_ELT(Op, DAG); |
6227 | case ISD::EXTRACT_VECTOR_ELT: |
6228 | return lowerEXTRACT_VECTOR_ELT(Op, DAG); |
6229 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6230 | return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); |
6231 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6232 | return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); |
6233 | case ISD::SHL: |
6234 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSHL_BY_SCALAR); |
6235 | case ISD::SRL: |
6236 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRL_BY_SCALAR); |
6237 | case ISD::SRA: |
6238 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VSRA_BY_SCALAR); |
6239 | case ISD::ROTL: |
6240 | return lowerShift(Op, DAG, ByScalar: SystemZISD::VROTL_BY_SCALAR); |
6241 | case ISD::IS_FPCLASS: |
6242 | return lowerIS_FPCLASS(Op, DAG); |
6243 | case ISD::GET_ROUNDING: |
6244 | return lowerGET_ROUNDING(Op, DAG); |
6245 | case ISD::READCYCLECOUNTER: |
6246 | return lowerREADCYCLECOUNTER(Op, DAG); |
6247 | default: |
6248 | llvm_unreachable("Unexpected node to lower" ); |
6249 | } |
6250 | } |
6251 | |
6252 | // Lower operations with invalid operand or result types (currently used |
6253 | // only for 128-bit integer types). |
6254 | void |
6255 | SystemZTargetLowering::LowerOperationWrapper(SDNode *N, |
6256 | SmallVectorImpl<SDValue> &Results, |
6257 | SelectionDAG &DAG) const { |
6258 | switch (N->getOpcode()) { |
6259 | case ISD::ATOMIC_LOAD: { |
6260 | SDLoc DL(N); |
6261 | SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); |
6262 | SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1) }; |
6263 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
6264 | SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, |
6265 | DL, Tys, Ops, MVT::i128, MMO); |
6266 | Results.push_back(Elt: lowerGR128ToI128(DAG, In: Res)); |
6267 | Results.push_back(Elt: Res.getValue(R: 1)); |
6268 | break; |
6269 | } |
6270 | case ISD::ATOMIC_STORE: { |
6271 | SDLoc DL(N); |
6272 | SDVTList Tys = DAG.getVTList(MVT::Other); |
6273 | SDValue Ops[] = {N->getOperand(Num: 0), lowerI128ToGR128(DAG, In: N->getOperand(Num: 1)), |
6274 | N->getOperand(Num: 2)}; |
6275 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
6276 | SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, |
6277 | DL, Tys, Ops, MVT::i128, MMO); |
6278 | // We have to enforce sequential consistency by performing a |
6279 | // serialization operation after the store. |
6280 | if (cast<AtomicSDNode>(N)->getSuccessOrdering() == |
6281 | AtomicOrdering::SequentiallyConsistent) |
6282 | Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, |
6283 | MVT::Other, Res), 0); |
6284 | Results.push_back(Elt: Res); |
6285 | break; |
6286 | } |
6287 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { |
6288 | SDLoc DL(N); |
6289 | SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); |
6290 | SDValue Ops[] = { N->getOperand(Num: 0), N->getOperand(Num: 1), |
6291 | lowerI128ToGR128(DAG, In: N->getOperand(Num: 2)), |
6292 | lowerI128ToGR128(DAG, In: N->getOperand(Num: 3)) }; |
6293 | MachineMemOperand *MMO = cast<AtomicSDNode>(Val: N)->getMemOperand(); |
6294 | SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, |
6295 | DL, Tys, Ops, MVT::i128, MMO); |
6296 | SDValue Success = emitSETCC(DAG, DL, CCReg: Res.getValue(R: 1), |
6297 | CCValid: SystemZ::CCMASK_CS, CCMask: SystemZ::CCMASK_CS_EQ); |
6298 | Success = DAG.getZExtOrTrunc(Op: Success, DL, VT: N->getValueType(ResNo: 1)); |
6299 | Results.push_back(Elt: lowerGR128ToI128(DAG, In: Res)); |
6300 | Results.push_back(Elt: Success); |
6301 | Results.push_back(Elt: Res.getValue(R: 2)); |
6302 | break; |
6303 | } |
6304 | case ISD::BITCAST: { |
6305 | SDValue Src = N->getOperand(Num: 0); |
6306 | if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && |
6307 | !useSoftFloat()) { |
6308 | SDLoc DL(N); |
6309 | SDValue Lo, Hi; |
6310 | if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) { |
6311 | SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src); |
6312 | Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, |
6313 | DAG.getConstant(1, DL, MVT::i32)); |
6314 | Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC, |
6315 | DAG.getConstant(0, DL, MVT::i32)); |
6316 | } else { |
6317 | assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass && |
6318 | "Unrecognized register class for f128." ); |
6319 | SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, |
6320 | DL, MVT::f64, Src); |
6321 | SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, |
6322 | DL, MVT::f64, Src); |
6323 | Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP); |
6324 | Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP); |
6325 | } |
6326 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi)); |
6327 | } |
6328 | break; |
6329 | } |
6330 | default: |
6331 | llvm_unreachable("Unexpected node to lower" ); |
6332 | } |
6333 | } |
6334 | |
6335 | void |
6336 | SystemZTargetLowering::ReplaceNodeResults(SDNode *N, |
6337 | SmallVectorImpl<SDValue> &Results, |
6338 | SelectionDAG &DAG) const { |
6339 | return LowerOperationWrapper(N, Results, DAG); |
6340 | } |
6341 | |
6342 | const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { |
6343 | #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME |
6344 | switch ((SystemZISD::NodeType)Opcode) { |
6345 | case SystemZISD::FIRST_NUMBER: break; |
6346 | OPCODE(RET_GLUE); |
6347 | OPCODE(CALL); |
6348 | OPCODE(SIBCALL); |
6349 | OPCODE(TLS_GDCALL); |
6350 | OPCODE(TLS_LDCALL); |
6351 | OPCODE(PCREL_WRAPPER); |
6352 | OPCODE(PCREL_OFFSET); |
6353 | OPCODE(ICMP); |
6354 | OPCODE(FCMP); |
6355 | OPCODE(STRICT_FCMP); |
6356 | OPCODE(STRICT_FCMPS); |
6357 | OPCODE(TM); |
6358 | OPCODE(BR_CCMASK); |
6359 | OPCODE(SELECT_CCMASK); |
6360 | OPCODE(ADJDYNALLOC); |
6361 | OPCODE(PROBED_ALLOCA); |
6362 | OPCODE(POPCNT); |
6363 | OPCODE(SMUL_LOHI); |
6364 | OPCODE(UMUL_LOHI); |
6365 | OPCODE(SDIVREM); |
6366 | OPCODE(UDIVREM); |
6367 | OPCODE(SADDO); |
6368 | OPCODE(SSUBO); |
6369 | OPCODE(UADDO); |
6370 | OPCODE(USUBO); |
6371 | OPCODE(ADDCARRY); |
6372 | OPCODE(SUBCARRY); |
6373 | OPCODE(GET_CCMASK); |
6374 | OPCODE(MVC); |
6375 | OPCODE(NC); |
6376 | OPCODE(OC); |
6377 | OPCODE(XC); |
6378 | OPCODE(CLC); |
6379 | OPCODE(MEMSET_MVC); |
6380 | OPCODE(STPCPY); |
6381 | OPCODE(STRCMP); |
6382 | OPCODE(SEARCH_STRING); |
6383 | OPCODE(IPM); |
6384 | OPCODE(TBEGIN); |
6385 | OPCODE(TBEGIN_NOFLOAT); |
6386 | OPCODE(TEND); |
6387 | OPCODE(BYTE_MASK); |
6388 | OPCODE(ROTATE_MASK); |
6389 | OPCODE(REPLICATE); |
6390 | OPCODE(JOIN_DWORDS); |
6391 | OPCODE(SPLAT); |
6392 | OPCODE(MERGE_HIGH); |
6393 | OPCODE(MERGE_LOW); |
6394 | OPCODE(SHL_DOUBLE); |
6395 | OPCODE(PERMUTE_DWORDS); |
6396 | OPCODE(PERMUTE); |
6397 | OPCODE(PACK); |
6398 | OPCODE(PACKS_CC); |
6399 | OPCODE(PACKLS_CC); |
6400 | OPCODE(UNPACK_HIGH); |
6401 | OPCODE(UNPACKL_HIGH); |
6402 | OPCODE(UNPACK_LOW); |
6403 | OPCODE(UNPACKL_LOW); |
6404 | OPCODE(VSHL_BY_SCALAR); |
6405 | OPCODE(VSRL_BY_SCALAR); |
6406 | OPCODE(VSRA_BY_SCALAR); |
6407 | OPCODE(VROTL_BY_SCALAR); |
6408 | OPCODE(VSUM); |
6409 | OPCODE(VACC); |
6410 | OPCODE(VSCBI); |
6411 | OPCODE(VAC); |
6412 | OPCODE(VSBI); |
6413 | OPCODE(VACCC); |
6414 | OPCODE(VSBCBI); |
6415 | OPCODE(VICMPE); |
6416 | OPCODE(VICMPH); |
6417 | OPCODE(VICMPHL); |
6418 | OPCODE(VICMPES); |
6419 | OPCODE(VICMPHS); |
6420 | OPCODE(VICMPHLS); |
6421 | OPCODE(VFCMPE); |
6422 | OPCODE(STRICT_VFCMPE); |
6423 | OPCODE(STRICT_VFCMPES); |
6424 | OPCODE(VFCMPH); |
6425 | OPCODE(STRICT_VFCMPH); |
6426 | OPCODE(STRICT_VFCMPHS); |
6427 | OPCODE(VFCMPHE); |
6428 | OPCODE(STRICT_VFCMPHE); |
6429 | OPCODE(STRICT_VFCMPHES); |
6430 | OPCODE(VFCMPES); |
6431 | OPCODE(VFCMPHS); |
6432 | OPCODE(VFCMPHES); |
6433 | OPCODE(VFTCI); |
6434 | OPCODE(VEXTEND); |
6435 | OPCODE(STRICT_VEXTEND); |
6436 | OPCODE(VROUND); |
6437 | OPCODE(STRICT_VROUND); |
6438 | OPCODE(VTM); |
6439 | OPCODE(SCMP128HI); |
6440 | OPCODE(UCMP128HI); |
6441 | OPCODE(VFAE_CC); |
6442 | OPCODE(VFAEZ_CC); |
6443 | OPCODE(VFEE_CC); |
6444 | OPCODE(VFEEZ_CC); |
6445 | OPCODE(VFENE_CC); |
6446 | OPCODE(VFENEZ_CC); |
6447 | OPCODE(VISTR_CC); |
6448 | OPCODE(VSTRC_CC); |
6449 | OPCODE(VSTRCZ_CC); |
6450 | OPCODE(VSTRS_CC); |
6451 | OPCODE(VSTRSZ_CC); |
6452 | OPCODE(TDC); |
6453 | OPCODE(ATOMIC_SWAPW); |
6454 | OPCODE(ATOMIC_LOADW_ADD); |
6455 | OPCODE(ATOMIC_LOADW_SUB); |
6456 | OPCODE(ATOMIC_LOADW_AND); |
6457 | OPCODE(ATOMIC_LOADW_OR); |
6458 | OPCODE(ATOMIC_LOADW_XOR); |
6459 | OPCODE(ATOMIC_LOADW_NAND); |
6460 | OPCODE(ATOMIC_LOADW_MIN); |
6461 | OPCODE(ATOMIC_LOADW_MAX); |
6462 | OPCODE(ATOMIC_LOADW_UMIN); |
6463 | OPCODE(ATOMIC_LOADW_UMAX); |
6464 | OPCODE(ATOMIC_CMP_SWAPW); |
6465 | OPCODE(ATOMIC_CMP_SWAP); |
6466 | OPCODE(ATOMIC_LOAD_128); |
6467 | OPCODE(ATOMIC_STORE_128); |
6468 | OPCODE(ATOMIC_CMP_SWAP_128); |
6469 | OPCODE(LRV); |
6470 | OPCODE(STRV); |
6471 | OPCODE(VLER); |
6472 | OPCODE(VSTER); |
6473 | OPCODE(STCKF); |
6474 | OPCODE(PREFETCH); |
6475 | OPCODE(ADA_ENTRY); |
6476 | } |
6477 | return nullptr; |
6478 | #undef OPCODE |
6479 | } |
6480 | |
6481 | // Return true if VT is a vector whose elements are a whole number of bytes |
6482 | // in width. Also check for presence of vector support. |
6483 | bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { |
6484 | if (!Subtarget.hasVector()) |
6485 | return false; |
6486 | |
6487 | return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); |
6488 | } |
6489 | |
6490 | // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT |
6491 | // producing a result of type ResVT. Op is a possibly bitcast version |
6492 | // of the input vector and Index is the index (based on type VecVT) that |
6493 | // should be extracted. Return the new extraction if a simplification |
6494 | // was possible or if Force is true. |
6495 | SDValue SystemZTargetLowering::(const SDLoc &DL, EVT ResVT, |
6496 | EVT VecVT, SDValue Op, |
6497 | unsigned Index, |
6498 | DAGCombinerInfo &DCI, |
6499 | bool Force) const { |
6500 | SelectionDAG &DAG = DCI.DAG; |
6501 | |
6502 | // The number of bytes being extracted. |
6503 | unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); |
6504 | |
6505 | for (;;) { |
6506 | unsigned Opcode = Op.getOpcode(); |
6507 | if (Opcode == ISD::BITCAST) |
6508 | // Look through bitcasts. |
6509 | Op = Op.getOperand(i: 0); |
6510 | else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && |
6511 | canTreatAsByteVector(VT: Op.getValueType())) { |
6512 | // Get a VPERM-like permute mask and see whether the bytes covered |
6513 | // by the extracted element are a contiguous sequence from one |
6514 | // source operand. |
6515 | SmallVector<int, SystemZ::VectorBytes> Bytes; |
6516 | if (!getVPermMask(ShuffleOp: Op, Bytes)) |
6517 | break; |
6518 | int First; |
6519 | if (!getShuffleInput(Bytes, Start: Index * BytesPerElement, |
6520 | BytesPerElement, Base&: First)) |
6521 | break; |
6522 | if (First < 0) |
6523 | return DAG.getUNDEF(VT: ResVT); |
6524 | // Make sure the contiguous sequence starts at a multiple of the |
6525 | // original element size. |
6526 | unsigned Byte = unsigned(First) % Bytes.size(); |
6527 | if (Byte % BytesPerElement != 0) |
6528 | break; |
6529 | // We can get the extracted value directly from an input. |
6530 | Index = Byte / BytesPerElement; |
6531 | Op = Op.getOperand(i: unsigned(First) / Bytes.size()); |
6532 | Force = true; |
6533 | } else if (Opcode == ISD::BUILD_VECTOR && |
6534 | canTreatAsByteVector(VT: Op.getValueType())) { |
6535 | // We can only optimize this case if the BUILD_VECTOR elements are |
6536 | // at least as wide as the extracted value. |
6537 | EVT OpVT = Op.getValueType(); |
6538 | unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); |
6539 | if (OpBytesPerElement < BytesPerElement) |
6540 | break; |
6541 | // Make sure that the least-significant bit of the extracted value |
6542 | // is the least significant bit of an input. |
6543 | unsigned End = (Index + 1) * BytesPerElement; |
6544 | if (End % OpBytesPerElement != 0) |
6545 | break; |
6546 | // We're extracting the low part of one operand of the BUILD_VECTOR. |
6547 | Op = Op.getOperand(i: End / OpBytesPerElement - 1); |
6548 | if (!Op.getValueType().isInteger()) { |
6549 | EVT VT = MVT::getIntegerVT(BitWidth: Op.getValueSizeInBits()); |
6550 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
6551 | DCI.AddToWorklist(N: Op.getNode()); |
6552 | } |
6553 | EVT VT = MVT::getIntegerVT(BitWidth: ResVT.getSizeInBits()); |
6554 | Op = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Op); |
6555 | if (VT != ResVT) { |
6556 | DCI.AddToWorklist(N: Op.getNode()); |
6557 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: ResVT, Operand: Op); |
6558 | } |
6559 | return Op; |
6560 | } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || |
6561 | Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || |
6562 | Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && |
6563 | canTreatAsByteVector(VT: Op.getValueType()) && |
6564 | canTreatAsByteVector(VT: Op.getOperand(i: 0).getValueType())) { |
6565 | // Make sure that only the unextended bits are significant. |
6566 | EVT ExtVT = Op.getValueType(); |
6567 | EVT OpVT = Op.getOperand(i: 0).getValueType(); |
6568 | unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); |
6569 | unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); |
6570 | unsigned Byte = Index * BytesPerElement; |
6571 | unsigned SubByte = Byte % ExtBytesPerElement; |
6572 | unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; |
6573 | if (SubByte < MinSubByte || |
6574 | SubByte + BytesPerElement > ExtBytesPerElement) |
6575 | break; |
6576 | // Get the byte offset of the unextended element |
6577 | Byte = Byte / ExtBytesPerElement * OpBytesPerElement; |
6578 | // ...then add the byte offset relative to that element. |
6579 | Byte += SubByte - MinSubByte; |
6580 | if (Byte % BytesPerElement != 0) |
6581 | break; |
6582 | Op = Op.getOperand(i: 0); |
6583 | Index = Byte / BytesPerElement; |
6584 | Force = true; |
6585 | } else |
6586 | break; |
6587 | } |
6588 | if (Force) { |
6589 | if (Op.getValueType() != VecVT) { |
6590 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL, VT: VecVT, Operand: Op); |
6591 | DCI.AddToWorklist(N: Op.getNode()); |
6592 | } |
6593 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, |
6594 | DAG.getConstant(Index, DL, MVT::i32)); |
6595 | } |
6596 | return SDValue(); |
6597 | } |
6598 | |
6599 | // Optimize vector operations in scalar value Op on the basis that Op |
6600 | // is truncated to TruncVT. |
6601 | SDValue SystemZTargetLowering::( |
6602 | const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { |
6603 | // If we have (trunc (extract_vector_elt X, Y)), try to turn it into |
6604 | // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements |
6605 | // of type TruncVT. |
6606 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
6607 | TruncVT.getSizeInBits() % 8 == 0) { |
6608 | SDValue Vec = Op.getOperand(i: 0); |
6609 | EVT VecVT = Vec.getValueType(); |
6610 | if (canTreatAsByteVector(VT: VecVT)) { |
6611 | if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1))) { |
6612 | unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); |
6613 | unsigned TruncBytes = TruncVT.getStoreSize(); |
6614 | if (BytesPerElement % TruncBytes == 0) { |
6615 | // Calculate the value of Y' in the above description. We are |
6616 | // splitting the original elements into Scale equal-sized pieces |
6617 | // and for truncation purposes want the last (least-significant) |
6618 | // of these pieces for IndexN. This is easiest to do by calculating |
6619 | // the start index of the following element and then subtracting 1. |
6620 | unsigned Scale = BytesPerElement / TruncBytes; |
6621 | unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; |
6622 | |
6623 | // Defer the creation of the bitcast from X to combineExtract, |
6624 | // which might be able to optimize the extraction. |
6625 | VecVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: TruncBytes * 8), |
6626 | NumElements: VecVT.getStoreSize() / TruncBytes); |
6627 | EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); |
6628 | return combineExtract(DL, ResVT, VecVT, Op: Vec, Index: NewIndex, DCI, Force: true); |
6629 | } |
6630 | } |
6631 | } |
6632 | } |
6633 | return SDValue(); |
6634 | } |
6635 | |
6636 | SDValue SystemZTargetLowering::combineZERO_EXTEND( |
6637 | SDNode *N, DAGCombinerInfo &DCI) const { |
6638 | // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') |
6639 | SelectionDAG &DAG = DCI.DAG; |
6640 | SDValue N0 = N->getOperand(Num: 0); |
6641 | EVT VT = N->getValueType(ResNo: 0); |
6642 | if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { |
6643 | auto *TrueOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 0)); |
6644 | auto *FalseOp = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
6645 | if (TrueOp && FalseOp) { |
6646 | SDLoc DL(N0); |
6647 | SDValue Ops[] = { DAG.getConstant(Val: TrueOp->getZExtValue(), DL, VT), |
6648 | DAG.getConstant(Val: FalseOp->getZExtValue(), DL, VT), |
6649 | N0.getOperand(i: 2), N0.getOperand(i: 3), N0.getOperand(i: 4) }; |
6650 | SDValue NewSelect = DAG.getNode(Opcode: SystemZISD::SELECT_CCMASK, DL, VT, Ops); |
6651 | // If N0 has multiple uses, change other uses as well. |
6652 | if (!N0.hasOneUse()) { |
6653 | SDValue TruncSelect = |
6654 | DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: N0.getValueType(), Operand: NewSelect); |
6655 | DCI.CombineTo(N: N0.getNode(), Res: TruncSelect); |
6656 | } |
6657 | return NewSelect; |
6658 | } |
6659 | } |
6660 | // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size |
6661 | // of the result is smaller than the size of X and all the truncated bits |
6662 | // of X are already zero. |
6663 | if (N0.getOpcode() == ISD::XOR && |
6664 | N0.hasOneUse() && N0.getOperand(i: 0).hasOneUse() && |
6665 | N0.getOperand(i: 0).getOpcode() == ISD::TRUNCATE && |
6666 | N0.getOperand(i: 1).getOpcode() == ISD::Constant) { |
6667 | SDValue X = N0.getOperand(i: 0).getOperand(i: 0); |
6668 | if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { |
6669 | KnownBits Known = DAG.computeKnownBits(Op: X); |
6670 | APInt TruncatedBits = APInt::getBitsSet(numBits: X.getValueSizeInBits(), |
6671 | loBit: N0.getValueSizeInBits(), |
6672 | hiBit: VT.getSizeInBits()); |
6673 | if (TruncatedBits.isSubsetOf(RHS: Known.Zero)) { |
6674 | X = DAG.getNode(Opcode: ISD::TRUNCATE, DL: SDLoc(X), VT, Operand: X); |
6675 | APInt Mask = N0.getConstantOperandAPInt(i: 1).zext(width: VT.getSizeInBits()); |
6676 | return DAG.getNode(Opcode: ISD::XOR, DL: SDLoc(N0), VT, |
6677 | N1: X, N2: DAG.getConstant(Val: Mask, DL: SDLoc(N0), VT)); |
6678 | } |
6679 | } |
6680 | } |
6681 | |
6682 | return SDValue(); |
6683 | } |
6684 | |
6685 | SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( |
6686 | SDNode *N, DAGCombinerInfo &DCI) const { |
6687 | // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) |
6688 | // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) |
6689 | // into (select_cc LHS, RHS, -1, 0, COND) |
6690 | SelectionDAG &DAG = DCI.DAG; |
6691 | SDValue N0 = N->getOperand(Num: 0); |
6692 | EVT VT = N->getValueType(ResNo: 0); |
6693 | EVT EVT = cast<VTSDNode>(Val: N->getOperand(Num: 1))->getVT(); |
6694 | if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) |
6695 | N0 = N0.getOperand(i: 0); |
6696 | if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { |
6697 | SDLoc DL(N0); |
6698 | SDValue Ops[] = { N0.getOperand(i: 0), N0.getOperand(i: 1), |
6699 | DAG.getConstant(Val: -1, DL, VT), DAG.getConstant(Val: 0, DL, VT), |
6700 | N0.getOperand(i: 2) }; |
6701 | return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT, Ops); |
6702 | } |
6703 | return SDValue(); |
6704 | } |
6705 | |
6706 | SDValue SystemZTargetLowering::combineSIGN_EXTEND( |
6707 | SDNode *N, DAGCombinerInfo &DCI) const { |
6708 | // Convert (sext (ashr (shl X, C1), C2)) to |
6709 | // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as |
6710 | // cheap as narrower ones. |
6711 | SelectionDAG &DAG = DCI.DAG; |
6712 | SDValue N0 = N->getOperand(Num: 0); |
6713 | EVT VT = N->getValueType(ResNo: 0); |
6714 | if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { |
6715 | auto *SraAmt = dyn_cast<ConstantSDNode>(Val: N0.getOperand(i: 1)); |
6716 | SDValue Inner = N0.getOperand(i: 0); |
6717 | if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { |
6718 | if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Val: Inner.getOperand(i: 1))) { |
6719 | unsigned = (VT.getSizeInBits() - N0.getValueSizeInBits()); |
6720 | unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; |
6721 | unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; |
6722 | EVT ShiftVT = N0.getOperand(i: 1).getValueType(); |
6723 | SDValue Ext = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: SDLoc(Inner), VT, |
6724 | Operand: Inner.getOperand(i: 0)); |
6725 | SDValue Shl = DAG.getNode(Opcode: ISD::SHL, DL: SDLoc(Inner), VT, N1: Ext, |
6726 | N2: DAG.getConstant(Val: NewShlAmt, DL: SDLoc(Inner), |
6727 | VT: ShiftVT)); |
6728 | return DAG.getNode(Opcode: ISD::SRA, DL: SDLoc(N0), VT, N1: Shl, |
6729 | N2: DAG.getConstant(Val: NewSraAmt, DL: SDLoc(N0), VT: ShiftVT)); |
6730 | } |
6731 | } |
6732 | } |
6733 | |
6734 | return SDValue(); |
6735 | } |
6736 | |
6737 | SDValue SystemZTargetLowering::combineMERGE( |
6738 | SDNode *N, DAGCombinerInfo &DCI) const { |
6739 | SelectionDAG &DAG = DCI.DAG; |
6740 | unsigned Opcode = N->getOpcode(); |
6741 | SDValue Op0 = N->getOperand(Num: 0); |
6742 | SDValue Op1 = N->getOperand(Num: 1); |
6743 | if (Op0.getOpcode() == ISD::BITCAST) |
6744 | Op0 = Op0.getOperand(i: 0); |
6745 | if (ISD::isBuildVectorAllZeros(N: Op0.getNode())) { |
6746 | // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF |
6747 | // for v4f32. |
6748 | if (Op1 == N->getOperand(Num: 0)) |
6749 | return Op1; |
6750 | // (z_merge_? 0, X) -> (z_unpackl_? 0, X). |
6751 | EVT VT = Op1.getValueType(); |
6752 | unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); |
6753 | if (ElemBytes <= 4) { |
6754 | Opcode = (Opcode == SystemZISD::MERGE_HIGH ? |
6755 | SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); |
6756 | EVT InVT = VT.changeVectorElementTypeToInteger(); |
6757 | EVT OutVT = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemBytes * 16), |
6758 | NumElements: SystemZ::VectorBytes / ElemBytes / 2); |
6759 | if (VT != InVT) { |
6760 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: InVT, Operand: Op1); |
6761 | DCI.AddToWorklist(N: Op1.getNode()); |
6762 | } |
6763 | SDValue Op = DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: Op1); |
6764 | DCI.AddToWorklist(N: Op.getNode()); |
6765 | return DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT, Operand: Op); |
6766 | } |
6767 | } |
6768 | return SDValue(); |
6769 | } |
6770 | |
6771 | SDValue SystemZTargetLowering::combineLOAD( |
6772 | SDNode *N, DAGCombinerInfo &DCI) const { |
6773 | SelectionDAG &DAG = DCI.DAG; |
6774 | EVT LdVT = N->getValueType(ResNo: 0); |
6775 | SDLoc DL(N); |
6776 | |
6777 | // Replace an i128 load that is used solely to move its value into GPRs |
6778 | // by separate loads of both halves. |
6779 | if (LdVT == MVT::i128) { |
6780 | LoadSDNode *LD = cast<LoadSDNode>(Val: N); |
6781 | if (!LD->isSimple() || !ISD::isNormalLoad(N: LD)) |
6782 | return SDValue(); |
6783 | |
6784 | // Scan through all users. |
6785 | SmallVector<std::pair<SDNode *, int>, 2> Users; |
6786 | int UsedElements = 0; |
6787 | for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); |
6788 | UI != UIEnd; ++UI) { |
6789 | // Skip the uses of the chain. |
6790 | if (UI.getUse().getResNo() != 0) |
6791 | continue; |
6792 | |
6793 | // Verify every user is a TRUNCATE to i64 of the low or high half ... |
6794 | SDNode *User = *UI; |
6795 | int Index = 1; |
6796 | if (User->getOpcode() == ISD::SRL && |
6797 | User->getOperand(Num: 1).getOpcode() == ISD::Constant && |
6798 | User->getConstantOperandVal(Num: 1) == 64 && User->hasOneUse()) { |
6799 | User = *User->use_begin(); |
6800 | Index = 0; |
6801 | } |
6802 | if (User->getOpcode() != ISD::TRUNCATE || |
6803 | User->getValueType(0) != MVT::i64) |
6804 | return SDValue(); |
6805 | |
6806 | // ... and no half is extracted twice. |
6807 | if (UsedElements & (1 << Index)) |
6808 | return SDValue(); |
6809 | |
6810 | UsedElements |= 1 << Index; |
6811 | Users.push_back(Elt: std::make_pair(x&: User, y&: Index)); |
6812 | } |
6813 | |
6814 | // Rewrite each extraction as an independent load. |
6815 | SmallVector<SDValue, 2> ArgChains; |
6816 | for (auto UserAndIndex : Users) { |
6817 | SDNode *User = UserAndIndex.first; |
6818 | unsigned Offset = User->getValueType(ResNo: 0).getStoreSize() * UserAndIndex.second; |
6819 | SDValue Ptr = |
6820 | DAG.getMemBasePlusOffset(Base: LD->getBasePtr(), Offset: TypeSize::getFixed(ExactSize: Offset), DL); |
6821 | SDValue EltLoad = |
6822 | DAG.getLoad(VT: User->getValueType(ResNo: 0), dl: DL, Chain: LD->getChain(), Ptr, |
6823 | PtrInfo: LD->getPointerInfo().getWithOffset(O: Offset), |
6824 | Alignment: LD->getOriginalAlign(), MMOFlags: LD->getMemOperand()->getFlags(), |
6825 | AAInfo: LD->getAAInfo()); |
6826 | |
6827 | DCI.CombineTo(N: User, Res: EltLoad, AddTo: true); |
6828 | ArgChains.push_back(Elt: EltLoad.getValue(R: 1)); |
6829 | } |
6830 | |
6831 | // Collect all chains via TokenFactor. |
6832 | SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, |
6833 | ArgChains); |
6834 | DAG.ReplaceAllUsesOfValueWith(From: SDValue(N, 1), To: Chain); |
6835 | DCI.AddToWorklist(N: Chain.getNode()); |
6836 | return SDValue(N, 0); |
6837 | } |
6838 | |
6839 | if (LdVT.isVector() || LdVT.isInteger()) |
6840 | return SDValue(); |
6841 | // Transform a scalar load that is REPLICATEd as well as having other |
6842 | // use(s) to the form where the other use(s) use the first element of the |
6843 | // REPLICATE instead of the load. Otherwise instruction selection will not |
6844 | // produce a VLREP. Avoid extracting to a GPR, so only do this for floating |
6845 | // point loads. |
6846 | |
6847 | SDValue Replicate; |
6848 | SmallVector<SDNode*, 8> OtherUses; |
6849 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
6850 | UI != UE; ++UI) { |
6851 | if (UI->getOpcode() == SystemZISD::REPLICATE) { |
6852 | if (Replicate) |
6853 | return SDValue(); // Should never happen |
6854 | Replicate = SDValue(*UI, 0); |
6855 | } |
6856 | else if (UI.getUse().getResNo() == 0) |
6857 | OtherUses.push_back(Elt: *UI); |
6858 | } |
6859 | if (!Replicate || OtherUses.empty()) |
6860 | return SDValue(); |
6861 | |
6862 | SDValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, |
6863 | Replicate, DAG.getConstant(0, DL, MVT::i32)); |
6864 | // Update uses of the loaded Value while preserving old chains. |
6865 | for (SDNode *U : OtherUses) { |
6866 | SmallVector<SDValue, 8> Ops; |
6867 | for (SDValue Op : U->ops()) |
6868 | Ops.push_back(Elt: (Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); |
6869 | DAG.UpdateNodeOperands(N: U, Ops); |
6870 | } |
6871 | return SDValue(N, 0); |
6872 | } |
6873 | |
6874 | bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { |
6875 | if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) |
6876 | return true; |
6877 | if (Subtarget.hasVectorEnhancements2()) |
6878 | if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) |
6879 | return true; |
6880 | return false; |
6881 | } |
6882 | |
6883 | static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { |
6884 | if (!VT.isVector() || !VT.isSimple() || |
6885 | VT.getSizeInBits() != 128 || |
6886 | VT.getScalarSizeInBits() % 8 != 0) |
6887 | return false; |
6888 | |
6889 | unsigned NumElts = VT.getVectorNumElements(); |
6890 | for (unsigned i = 0; i < NumElts; ++i) { |
6891 | if (M[i] < 0) continue; // ignore UNDEF indices |
6892 | if ((unsigned) M[i] != NumElts - 1 - i) |
6893 | return false; |
6894 | } |
6895 | |
6896 | return true; |
6897 | } |
6898 | |
6899 | static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { |
6900 | for (auto *U : StoredVal->uses()) { |
6901 | if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Val: U)) { |
6902 | EVT CurrMemVT = ST->getMemoryVT().getScalarType(); |
6903 | if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) |
6904 | continue; |
6905 | } else if (isa<BuildVectorSDNode>(Val: U)) { |
6906 | SDValue BuildVector = SDValue(U, 0); |
6907 | if (DAG.isSplatValue(V: BuildVector, AllowUndefs: true/*AllowUndefs*/) && |
6908 | isOnlyUsedByStores(StoredVal: BuildVector, DAG)) |
6909 | continue; |
6910 | } |
6911 | return false; |
6912 | } |
6913 | return true; |
6914 | } |
6915 | |
6916 | static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) { |
6917 | if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) |
6918 | return false; |
6919 | |
6920 | SDValue Op0 = Val.getOperand(i: 0); |
6921 | SDValue Op1 = Val.getOperand(i: 1); |
6922 | |
6923 | if (Op0.getOpcode() == ISD::SHL) |
6924 | std::swap(a&: Op0, b&: Op1); |
6925 | if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || |
6926 | Op1.getOperand(i: 1).getOpcode() != ISD::Constant || |
6927 | Op1.getConstantOperandVal(i: 1) != 64) |
6928 | return false; |
6929 | Op1 = Op1.getOperand(i: 0); |
6930 | |
6931 | if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || |
6932 | Op0.getOperand(0).getValueType() != MVT::i64) |
6933 | return false; |
6934 | if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || |
6935 | Op1.getOperand(0).getValueType() != MVT::i64) |
6936 | return false; |
6937 | |
6938 | LoPart = Op0.getOperand(i: 0); |
6939 | HiPart = Op1.getOperand(i: 0); |
6940 | return true; |
6941 | } |
6942 | |
6943 | SDValue SystemZTargetLowering::combineSTORE( |
6944 | SDNode *N, DAGCombinerInfo &DCI) const { |
6945 | SelectionDAG &DAG = DCI.DAG; |
6946 | auto *SN = cast<StoreSDNode>(Val: N); |
6947 | auto &Op1 = N->getOperand(Num: 1); |
6948 | EVT MemVT = SN->getMemoryVT(); |
6949 | // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better |
6950 | // for the extraction to be done on a vMiN value, so that we can use VSTE. |
6951 | // If X has wider elements then convert it to: |
6952 | // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). |
6953 | if (MemVT.isInteger() && SN->isTruncatingStore()) { |
6954 | if (SDValue Value = |
6955 | combineTruncateExtract(DL: SDLoc(N), TruncVT: MemVT, Op: SN->getValue(), DCI)) { |
6956 | DCI.AddToWorklist(N: Value.getNode()); |
6957 | |
6958 | // Rewrite the store with the new form of stored value. |
6959 | return DAG.getTruncStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: Value, |
6960 | Ptr: SN->getBasePtr(), SVT: SN->getMemoryVT(), |
6961 | MMO: SN->getMemOperand()); |
6962 | } |
6963 | } |
6964 | // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR |
6965 | if (!SN->isTruncatingStore() && |
6966 | Op1.getOpcode() == ISD::BSWAP && |
6967 | Op1.getNode()->hasOneUse() && |
6968 | canLoadStoreByteSwapped(VT: Op1.getValueType())) { |
6969 | |
6970 | SDValue BSwapOp = Op1.getOperand(i: 0); |
6971 | |
6972 | if (BSwapOp.getValueType() == MVT::i16) |
6973 | BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); |
6974 | |
6975 | SDValue Ops[] = { |
6976 | N->getOperand(Num: 0), BSwapOp, N->getOperand(Num: 2) |
6977 | }; |
6978 | |
6979 | return |
6980 | DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), |
6981 | Ops, MemVT, SN->getMemOperand()); |
6982 | } |
6983 | // Combine STORE (element-swap) into VSTER |
6984 | if (!SN->isTruncatingStore() && |
6985 | Op1.getOpcode() == ISD::VECTOR_SHUFFLE && |
6986 | Op1.getNode()->hasOneUse() && |
6987 | Subtarget.hasVectorEnhancements2()) { |
6988 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: Op1.getNode()); |
6989 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
6990 | if (isVectorElementSwap(M: ShuffleMask, VT: Op1.getValueType())) { |
6991 | SDValue Ops[] = { |
6992 | N->getOperand(Num: 0), Op1.getOperand(i: 0), N->getOperand(Num: 2) |
6993 | }; |
6994 | |
6995 | return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), |
6996 | DAG.getVTList(MVT::Other), |
6997 | Ops, MemVT, SN->getMemOperand()); |
6998 | } |
6999 | } |
7000 | |
7001 | // Combine STORE (READCYCLECOUNTER) into STCKF. |
7002 | if (!SN->isTruncatingStore() && |
7003 | Op1.getOpcode() == ISD::READCYCLECOUNTER && |
7004 | Op1.hasOneUse() && |
7005 | N->getOperand(Num: 0).reachesChainWithoutSideEffects(Dest: SDValue(Op1.getNode(), 1))) { |
7006 | SDValue Ops[] = { Op1.getOperand(i: 0), N->getOperand(Num: 2) }; |
7007 | return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), |
7008 | DAG.getVTList(MVT::Other), |
7009 | Ops, MemVT, SN->getMemOperand()); |
7010 | } |
7011 | |
7012 | // Transform a store of an i128 moved from GPRs into two separate stores. |
7013 | if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) { |
7014 | SDValue LoPart, HiPart; |
7015 | if (isMovedFromParts(Val: Op1, LoPart, HiPart)) { |
7016 | SDLoc DL(SN); |
7017 | SDValue Chain0 = |
7018 | DAG.getStore(Chain: SN->getChain(), dl: DL, Val: HiPart, Ptr: SN->getBasePtr(), |
7019 | PtrInfo: SN->getPointerInfo(), Alignment: SN->getOriginalAlign(), |
7020 | MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo()); |
7021 | SDValue Chain1 = |
7022 | DAG.getStore(Chain: SN->getChain(), dl: DL, Val: LoPart, |
7023 | Ptr: DAG.getObjectPtrOffset(SL: DL, Ptr: SN->getBasePtr(), |
7024 | Offset: TypeSize::getFixed(ExactSize: 8)), |
7025 | PtrInfo: SN->getPointerInfo().getWithOffset(O: 8), |
7026 | Alignment: SN->getOriginalAlign(), |
7027 | MMOFlags: SN->getMemOperand()->getFlags(), AAInfo: SN->getAAInfo()); |
7028 | |
7029 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); |
7030 | } |
7031 | } |
7032 | |
7033 | // Replicate a reg or immediate with VREP instead of scalar multiply or |
7034 | // immediate load. It seems best to do this during the first DAGCombine as |
7035 | // it is straight-forward to handle the zero-extend node in the initial |
7036 | // DAG, and also not worry about the keeping the new MemVT legal (e.g. when |
7037 | // extracting an i16 element from a v16i8 vector). |
7038 | if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && |
7039 | isOnlyUsedByStores(StoredVal: Op1, DAG)) { |
7040 | SDValue Word = SDValue(); |
7041 | EVT WordVT; |
7042 | |
7043 | // Find a replicated immediate and return it if found in Word and its |
7044 | // type in WordVT. |
7045 | auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { |
7046 | // Some constants are better handled with a scalar store. |
7047 | if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || |
7048 | isInt<16>(x: C->getSExtValue()) || MemVT.getStoreSize() <= 2) |
7049 | return; |
7050 | SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue())); |
7051 | if (VCI.isVectorConstantLegal(Subtarget) && |
7052 | VCI.Opcode == SystemZISD::REPLICATE) { |
7053 | Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); |
7054 | WordVT = VCI.VecVT.getScalarType(); |
7055 | } |
7056 | }; |
7057 | |
7058 | // Find a replicated register and return it if found in Word and its type |
7059 | // in WordVT. |
7060 | auto FindReplicatedReg = [&](SDValue MulOp) { |
7061 | EVT MulVT = MulOp.getValueType(); |
7062 | if (MulOp->getOpcode() == ISD::MUL && |
7063 | (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { |
7064 | // Find a zero extended value and its type. |
7065 | SDValue LHS = MulOp->getOperand(Num: 0); |
7066 | if (LHS->getOpcode() == ISD::ZERO_EXTEND) |
7067 | WordVT = LHS->getOperand(Num: 0).getValueType(); |
7068 | else if (LHS->getOpcode() == ISD::AssertZext) |
7069 | WordVT = cast<VTSDNode>(Val: LHS->getOperand(Num: 1))->getVT(); |
7070 | else |
7071 | return; |
7072 | // Find a replicating constant, e.g. 0x00010001. |
7073 | if (auto *C = dyn_cast<ConstantSDNode>(Val: MulOp->getOperand(Num: 1))) { |
7074 | SystemZVectorConstantInfo VCI( |
7075 | APInt(MulVT.getSizeInBits(), C->getZExtValue())); |
7076 | if (VCI.isVectorConstantLegal(Subtarget) && |
7077 | VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && |
7078 | WordVT == VCI.VecVT.getScalarType()) |
7079 | Word = DAG.getZExtOrTrunc(Op: LHS->getOperand(Num: 0), DL: SDLoc(SN), VT: WordVT); |
7080 | } |
7081 | } |
7082 | }; |
7083 | |
7084 | if (isa<BuildVectorSDNode>(Val: Op1) && |
7085 | DAG.isSplatValue(V: Op1, AllowUndefs: true/*AllowUndefs*/)) { |
7086 | SDValue SplatVal = Op1->getOperand(Num: 0); |
7087 | if (auto *C = dyn_cast<ConstantSDNode>(Val&: SplatVal)) |
7088 | FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); |
7089 | else |
7090 | FindReplicatedReg(SplatVal); |
7091 | } else { |
7092 | if (auto *C = dyn_cast<ConstantSDNode>(Val: Op1)) |
7093 | FindReplicatedImm(C, MemVT.getStoreSize()); |
7094 | else |
7095 | FindReplicatedReg(Op1); |
7096 | } |
7097 | |
7098 | if (Word != SDValue()) { |
7099 | assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && |
7100 | "Bad type handling" ); |
7101 | unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); |
7102 | EVT SplatVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: WordVT, NumElements: NumElts); |
7103 | SDValue SplatVal = DAG.getSplatVector(VT: SplatVT, DL: SDLoc(SN), Op: Word); |
7104 | return DAG.getStore(Chain: SN->getChain(), dl: SDLoc(SN), Val: SplatVal, |
7105 | Ptr: SN->getBasePtr(), MMO: SN->getMemOperand()); |
7106 | } |
7107 | } |
7108 | |
7109 | return SDValue(); |
7110 | } |
7111 | |
7112 | SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( |
7113 | SDNode *N, DAGCombinerInfo &DCI) const { |
7114 | SelectionDAG &DAG = DCI.DAG; |
7115 | // Combine element-swap (LOAD) into VLER |
7116 | if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) && |
7117 | N->getOperand(Num: 0).hasOneUse() && |
7118 | Subtarget.hasVectorEnhancements2()) { |
7119 | ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Val: N); |
7120 | ArrayRef<int> ShuffleMask = SVN->getMask(); |
7121 | if (isVectorElementSwap(M: ShuffleMask, VT: N->getValueType(ResNo: 0))) { |
7122 | SDValue Load = N->getOperand(Num: 0); |
7123 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Load); |
7124 | |
7125 | // Create the element-swapping load. |
7126 | SDValue Ops[] = { |
7127 | LD->getChain(), // Chain |
7128 | LD->getBasePtr() // Ptr |
7129 | }; |
7130 | SDValue ESLoad = |
7131 | DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), |
7132 | DAG.getVTList(LD->getValueType(0), MVT::Other), |
7133 | Ops, LD->getMemoryVT(), LD->getMemOperand()); |
7134 | |
7135 | // First, combine the VECTOR_SHUFFLE away. This makes the value produced |
7136 | // by the load dead. |
7137 | DCI.CombineTo(N, Res: ESLoad); |
7138 | |
7139 | // Next, combine the load away, we give it a bogus result value but a real |
7140 | // chain result. The result value is dead because the shuffle is dead. |
7141 | DCI.CombineTo(N: Load.getNode(), Res0: ESLoad, Res1: ESLoad.getValue(R: 1)); |
7142 | |
7143 | // Return N so it doesn't get rechecked! |
7144 | return SDValue(N, 0); |
7145 | } |
7146 | } |
7147 | |
7148 | return SDValue(); |
7149 | } |
7150 | |
7151 | SDValue SystemZTargetLowering::( |
7152 | SDNode *N, DAGCombinerInfo &DCI) const { |
7153 | SelectionDAG &DAG = DCI.DAG; |
7154 | |
7155 | if (!Subtarget.hasVector()) |
7156 | return SDValue(); |
7157 | |
7158 | // Look through bitcasts that retain the number of vector elements. |
7159 | SDValue Op = N->getOperand(Num: 0); |
7160 | if (Op.getOpcode() == ISD::BITCAST && |
7161 | Op.getValueType().isVector() && |
7162 | Op.getOperand(i: 0).getValueType().isVector() && |
7163 | Op.getValueType().getVectorNumElements() == |
7164 | Op.getOperand(i: 0).getValueType().getVectorNumElements()) |
7165 | Op = Op.getOperand(i: 0); |
7166 | |
7167 | // Pull BSWAP out of a vector extraction. |
7168 | if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { |
7169 | EVT VecVT = Op.getValueType(); |
7170 | EVT EltVT = VecVT.getVectorElementType(); |
7171 | Op = DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: SDLoc(N), VT: EltVT, |
7172 | N1: Op.getOperand(i: 0), N2: N->getOperand(Num: 1)); |
7173 | DCI.AddToWorklist(N: Op.getNode()); |
7174 | Op = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Op); |
7175 | if (EltVT != N->getValueType(ResNo: 0)) { |
7176 | DCI.AddToWorklist(N: Op.getNode()); |
7177 | Op = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), Operand: Op); |
7178 | } |
7179 | return Op; |
7180 | } |
7181 | |
7182 | // Try to simplify a vector extraction. |
7183 | if (auto *IndexN = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1))) { |
7184 | SDValue Op0 = N->getOperand(Num: 0); |
7185 | EVT VecVT = Op0.getValueType(); |
7186 | return combineExtract(DL: SDLoc(N), ResVT: N->getValueType(ResNo: 0), VecVT, Op: Op0, |
7187 | Index: IndexN->getZExtValue(), DCI, Force: false); |
7188 | } |
7189 | return SDValue(); |
7190 | } |
7191 | |
7192 | SDValue SystemZTargetLowering::combineJOIN_DWORDS( |
7193 | SDNode *N, DAGCombinerInfo &DCI) const { |
7194 | SelectionDAG &DAG = DCI.DAG; |
7195 | // (join_dwords X, X) == (replicate X) |
7196 | if (N->getOperand(Num: 0) == N->getOperand(Num: 1)) |
7197 | return DAG.getNode(Opcode: SystemZISD::REPLICATE, DL: SDLoc(N), VT: N->getValueType(ResNo: 0), |
7198 | Operand: N->getOperand(Num: 0)); |
7199 | return SDValue(); |
7200 | } |
7201 | |
7202 | static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { |
7203 | SDValue Chain1 = N1->getOperand(Num: 0); |
7204 | SDValue Chain2 = N2->getOperand(Num: 0); |
7205 | |
7206 | // Trivial case: both nodes take the same chain. |
7207 | if (Chain1 == Chain2) |
7208 | return Chain1; |
7209 | |
7210 | // FIXME - we could handle more complex cases via TokenFactor, |
7211 | // assuming we can verify that this would not create a cycle. |
7212 | return SDValue(); |
7213 | } |
7214 | |
7215 | SDValue SystemZTargetLowering::combineFP_ROUND( |
7216 | SDNode *N, DAGCombinerInfo &DCI) const { |
7217 | |
7218 | if (!Subtarget.hasVector()) |
7219 | return SDValue(); |
7220 | |
7221 | // (fpround (extract_vector_elt X 0)) |
7222 | // (fpround (extract_vector_elt X 1)) -> |
7223 | // (extract_vector_elt (VROUND X) 0) |
7224 | // (extract_vector_elt (VROUND X) 2) |
7225 | // |
7226 | // This is a special case since the target doesn't really support v2f32s. |
7227 | unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; |
7228 | SelectionDAG &DAG = DCI.DAG; |
7229 | SDValue Op0 = N->getOperand(Num: OpNo); |
7230 | if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && |
7231 | Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
7232 | Op0.getOperand(0).getValueType() == MVT::v2f64 && |
7233 | Op0.getOperand(1).getOpcode() == ISD::Constant && |
7234 | Op0.getConstantOperandVal(1) == 0) { |
7235 | SDValue Vec = Op0.getOperand(i: 0); |
7236 | for (auto *U : Vec->uses()) { |
7237 | if (U != Op0.getNode() && U->hasOneUse() && |
7238 | U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
7239 | U->getOperand(Num: 0) == Vec && |
7240 | U->getOperand(Num: 1).getOpcode() == ISD::Constant && |
7241 | U->getConstantOperandVal(Num: 1) == 1) { |
7242 | SDValue OtherRound = SDValue(*U->use_begin(), 0); |
7243 | if (OtherRound.getOpcode() == N->getOpcode() && |
7244 | OtherRound.getOperand(OpNo) == SDValue(U, 0) && |
7245 | OtherRound.getValueType() == MVT::f32) { |
7246 | SDValue VRound, Chain; |
7247 | if (N->isStrictFPOpcode()) { |
7248 | Chain = MergeInputChains(N1: N, N2: OtherRound.getNode()); |
7249 | if (!Chain) |
7250 | continue; |
7251 | VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), |
7252 | {MVT::v4f32, MVT::Other}, {Chain, Vec}); |
7253 | Chain = VRound.getValue(R: 1); |
7254 | } else |
7255 | VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), |
7256 | MVT::v4f32, Vec); |
7257 | DCI.AddToWorklist(N: VRound.getNode()); |
7258 | SDValue = |
7259 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, |
7260 | VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); |
7261 | DCI.AddToWorklist(N: Extract1.getNode()); |
7262 | DAG.ReplaceAllUsesOfValueWith(From: OtherRound, To: Extract1); |
7263 | if (Chain) |
7264 | DAG.ReplaceAllUsesOfValueWith(From: OtherRound.getValue(R: 1), To: Chain); |
7265 | SDValue = |
7266 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, |
7267 | VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); |
7268 | if (Chain) |
7269 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0), |
7270 | VTList: N->getVTList(), N1: Extract0, N2: Chain); |
7271 | return Extract0; |
7272 | } |
7273 | } |
7274 | } |
7275 | } |
7276 | return SDValue(); |
7277 | } |
7278 | |
7279 | SDValue SystemZTargetLowering::combineFP_EXTEND( |
7280 | SDNode *N, DAGCombinerInfo &DCI) const { |
7281 | |
7282 | if (!Subtarget.hasVector()) |
7283 | return SDValue(); |
7284 | |
7285 | // (fpextend (extract_vector_elt X 0)) |
7286 | // (fpextend (extract_vector_elt X 2)) -> |
7287 | // (extract_vector_elt (VEXTEND X) 0) |
7288 | // (extract_vector_elt (VEXTEND X) 1) |
7289 | // |
7290 | // This is a special case since the target doesn't really support v2f32s. |
7291 | unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; |
7292 | SelectionDAG &DAG = DCI.DAG; |
7293 | SDValue Op0 = N->getOperand(Num: OpNo); |
7294 | if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && |
7295 | Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
7296 | Op0.getOperand(0).getValueType() == MVT::v4f32 && |
7297 | Op0.getOperand(1).getOpcode() == ISD::Constant && |
7298 | Op0.getConstantOperandVal(1) == 0) { |
7299 | SDValue Vec = Op0.getOperand(i: 0); |
7300 | for (auto *U : Vec->uses()) { |
7301 | if (U != Op0.getNode() && U->hasOneUse() && |
7302 | U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
7303 | U->getOperand(Num: 0) == Vec && |
7304 | U->getOperand(Num: 1).getOpcode() == ISD::Constant && |
7305 | U->getConstantOperandVal(Num: 1) == 2) { |
7306 | SDValue OtherExtend = SDValue(*U->use_begin(), 0); |
7307 | if (OtherExtend.getOpcode() == N->getOpcode() && |
7308 | OtherExtend.getOperand(OpNo) == SDValue(U, 0) && |
7309 | OtherExtend.getValueType() == MVT::f64) { |
7310 | SDValue VExtend, Chain; |
7311 | if (N->isStrictFPOpcode()) { |
7312 | Chain = MergeInputChains(N1: N, N2: OtherExtend.getNode()); |
7313 | if (!Chain) |
7314 | continue; |
7315 | VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), |
7316 | {MVT::v2f64, MVT::Other}, {Chain, Vec}); |
7317 | Chain = VExtend.getValue(R: 1); |
7318 | } else |
7319 | VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), |
7320 | MVT::v2f64, Vec); |
7321 | DCI.AddToWorklist(N: VExtend.getNode()); |
7322 | SDValue = |
7323 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, |
7324 | VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); |
7325 | DCI.AddToWorklist(N: Extract1.getNode()); |
7326 | DAG.ReplaceAllUsesOfValueWith(From: OtherExtend, To: Extract1); |
7327 | if (Chain) |
7328 | DAG.ReplaceAllUsesOfValueWith(From: OtherExtend.getValue(R: 1), To: Chain); |
7329 | SDValue = |
7330 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, |
7331 | VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); |
7332 | if (Chain) |
7333 | return DAG.getNode(Opcode: ISD::MERGE_VALUES, DL: SDLoc(Op0), |
7334 | VTList: N->getVTList(), N1: Extract0, N2: Chain); |
7335 | return Extract0; |
7336 | } |
7337 | } |
7338 | } |
7339 | } |
7340 | return SDValue(); |
7341 | } |
7342 | |
7343 | SDValue SystemZTargetLowering::combineINT_TO_FP( |
7344 | SDNode *N, DAGCombinerInfo &DCI) const { |
7345 | if (DCI.Level != BeforeLegalizeTypes) |
7346 | return SDValue(); |
7347 | SelectionDAG &DAG = DCI.DAG; |
7348 | LLVMContext &Ctx = *DAG.getContext(); |
7349 | unsigned Opcode = N->getOpcode(); |
7350 | EVT OutVT = N->getValueType(ResNo: 0); |
7351 | Type *OutLLVMTy = OutVT.getTypeForEVT(Context&: Ctx); |
7352 | SDValue Op = N->getOperand(Num: 0); |
7353 | unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); |
7354 | unsigned InScalarBits = Op->getValueType(ResNo: 0).getScalarSizeInBits(); |
7355 | |
7356 | // Insert an extension before type-legalization to avoid scalarization, e.g.: |
7357 | // v2f64 = uint_to_fp v2i16 |
7358 | // => |
7359 | // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) |
7360 | if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && |
7361 | OutScalarBits <= 64) { |
7362 | unsigned NumElts = cast<FixedVectorType>(Val: OutLLVMTy)->getNumElements(); |
7363 | EVT ExtVT = EVT::getVectorVT( |
7364 | Context&: Ctx, VT: EVT::getIntegerVT(Context&: Ctx, BitWidth: OutLLVMTy->getScalarSizeInBits()), NumElements: NumElts); |
7365 | unsigned ExtOpcode = |
7366 | (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); |
7367 | SDValue ExtOp = DAG.getNode(Opcode: ExtOpcode, DL: SDLoc(N), VT: ExtVT, Operand: Op); |
7368 | return DAG.getNode(Opcode, DL: SDLoc(N), VT: OutVT, Operand: ExtOp); |
7369 | } |
7370 | return SDValue(); |
7371 | } |
7372 | |
7373 | SDValue SystemZTargetLowering::combineBSWAP( |
7374 | SDNode *N, DAGCombinerInfo &DCI) const { |
7375 | SelectionDAG &DAG = DCI.DAG; |
7376 | // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR |
7377 | if (ISD::isNON_EXTLoad(N: N->getOperand(Num: 0).getNode()) && |
7378 | N->getOperand(Num: 0).hasOneUse() && |
7379 | canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0))) { |
7380 | SDValue Load = N->getOperand(Num: 0); |
7381 | LoadSDNode *LD = cast<LoadSDNode>(Val&: Load); |
7382 | |
7383 | // Create the byte-swapping load. |
7384 | SDValue Ops[] = { |
7385 | LD->getChain(), // Chain |
7386 | LD->getBasePtr() // Ptr |
7387 | }; |
7388 | EVT LoadVT = N->getValueType(ResNo: 0); |
7389 | if (LoadVT == MVT::i16) |
7390 | LoadVT = MVT::i32; |
7391 | SDValue BSLoad = |
7392 | DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), |
7393 | DAG.getVTList(LoadVT, MVT::Other), |
7394 | Ops, LD->getMemoryVT(), LD->getMemOperand()); |
7395 | |
7396 | // If this is an i16 load, insert the truncate. |
7397 | SDValue ResVal = BSLoad; |
7398 | if (N->getValueType(0) == MVT::i16) |
7399 | ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); |
7400 | |
7401 | // First, combine the bswap away. This makes the value produced by the |
7402 | // load dead. |
7403 | DCI.CombineTo(N, Res: ResVal); |
7404 | |
7405 | // Next, combine the load away, we give it a bogus result value but a real |
7406 | // chain result. The result value is dead because the bswap is dead. |
7407 | DCI.CombineTo(N: Load.getNode(), Res0: ResVal, Res1: BSLoad.getValue(R: 1)); |
7408 | |
7409 | // Return N so it doesn't get rechecked! |
7410 | return SDValue(N, 0); |
7411 | } |
7412 | |
7413 | // Look through bitcasts that retain the number of vector elements. |
7414 | SDValue Op = N->getOperand(Num: 0); |
7415 | if (Op.getOpcode() == ISD::BITCAST && |
7416 | Op.getValueType().isVector() && |
7417 | Op.getOperand(i: 0).getValueType().isVector() && |
7418 | Op.getValueType().getVectorNumElements() == |
7419 | Op.getOperand(i: 0).getValueType().getVectorNumElements()) |
7420 | Op = Op.getOperand(i: 0); |
7421 | |
7422 | // Push BSWAP into a vector insertion if at least one side then simplifies. |
7423 | if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { |
7424 | SDValue Vec = Op.getOperand(i: 0); |
7425 | SDValue Elt = Op.getOperand(i: 1); |
7426 | SDValue Idx = Op.getOperand(i: 2); |
7427 | |
7428 | if (DAG.isConstantIntBuildVectorOrConstantInt(N: Vec) || |
7429 | Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || |
7430 | DAG.isConstantIntBuildVectorOrConstantInt(N: Elt) || |
7431 | Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || |
7432 | (canLoadStoreByteSwapped(VT: N->getValueType(ResNo: 0)) && |
7433 | ISD::isNON_EXTLoad(N: Elt.getNode()) && Elt.hasOneUse())) { |
7434 | EVT VecVT = N->getValueType(ResNo: 0); |
7435 | EVT EltVT = N->getValueType(ResNo: 0).getVectorElementType(); |
7436 | if (VecVT != Vec.getValueType()) { |
7437 | Vec = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Vec); |
7438 | DCI.AddToWorklist(N: Vec.getNode()); |
7439 | } |
7440 | if (EltVT != Elt.getValueType()) { |
7441 | Elt = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: EltVT, Operand: Elt); |
7442 | DCI.AddToWorklist(N: Elt.getNode()); |
7443 | } |
7444 | Vec = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Vec); |
7445 | DCI.AddToWorklist(N: Vec.getNode()); |
7446 | Elt = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: EltVT, Operand: Elt); |
7447 | DCI.AddToWorklist(N: Elt.getNode()); |
7448 | return DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: SDLoc(N), VT: VecVT, |
7449 | N1: Vec, N2: Elt, N3: Idx); |
7450 | } |
7451 | } |
7452 | |
7453 | // Push BSWAP into a vector shuffle if at least one side then simplifies. |
7454 | ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Val&: Op); |
7455 | if (SV && Op.hasOneUse()) { |
7456 | SDValue Op0 = Op.getOperand(i: 0); |
7457 | SDValue Op1 = Op.getOperand(i: 1); |
7458 | |
7459 | if (DAG.isConstantIntBuildVectorOrConstantInt(N: Op0) || |
7460 | Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || |
7461 | DAG.isConstantIntBuildVectorOrConstantInt(N: Op1) || |
7462 | Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { |
7463 | EVT VecVT = N->getValueType(ResNo: 0); |
7464 | if (VecVT != Op0.getValueType()) { |
7465 | Op0 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op0); |
7466 | DCI.AddToWorklist(N: Op0.getNode()); |
7467 | } |
7468 | if (VecVT != Op1.getValueType()) { |
7469 | Op1 = DAG.getNode(Opcode: ISD::BITCAST, DL: SDLoc(N), VT: VecVT, Operand: Op1); |
7470 | DCI.AddToWorklist(N: Op1.getNode()); |
7471 | } |
7472 | Op0 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op0); |
7473 | DCI.AddToWorklist(N: Op0.getNode()); |
7474 | Op1 = DAG.getNode(Opcode: ISD::BSWAP, DL: SDLoc(N), VT: VecVT, Operand: Op1); |
7475 | DCI.AddToWorklist(N: Op1.getNode()); |
7476 | return DAG.getVectorShuffle(VT: VecVT, dl: SDLoc(N), N1: Op0, N2: Op1, Mask: SV->getMask()); |
7477 | } |
7478 | } |
7479 | |
7480 | return SDValue(); |
7481 | } |
7482 | |
7483 | static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { |
7484 | // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code |
7485 | // set by the CCReg instruction using the CCValid / CCMask masks, |
7486 | // If the CCReg instruction is itself a ICMP testing the condition |
7487 | // code set by some other instruction, see whether we can directly |
7488 | // use that condition code. |
7489 | |
7490 | // Verify that we have an ICMP against some constant. |
7491 | if (CCValid != SystemZ::CCMASK_ICMP) |
7492 | return false; |
7493 | auto *ICmp = CCReg.getNode(); |
7494 | if (ICmp->getOpcode() != SystemZISD::ICMP) |
7495 | return false; |
7496 | auto *CompareLHS = ICmp->getOperand(Num: 0).getNode(); |
7497 | auto *CompareRHS = dyn_cast<ConstantSDNode>(Val: ICmp->getOperand(Num: 1)); |
7498 | if (!CompareRHS) |
7499 | return false; |
7500 | |
7501 | // Optimize the case where CompareLHS is a SELECT_CCMASK. |
7502 | if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { |
7503 | // Verify that we have an appropriate mask for a EQ or NE comparison. |
7504 | bool Invert = false; |
7505 | if (CCMask == SystemZ::CCMASK_CMP_NE) |
7506 | Invert = !Invert; |
7507 | else if (CCMask != SystemZ::CCMASK_CMP_EQ) |
7508 | return false; |
7509 | |
7510 | // Verify that the ICMP compares against one of select values. |
7511 | auto *TrueVal = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 0)); |
7512 | if (!TrueVal) |
7513 | return false; |
7514 | auto *FalseVal = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 1)); |
7515 | if (!FalseVal) |
7516 | return false; |
7517 | if (CompareRHS->getZExtValue() == FalseVal->getZExtValue()) |
7518 | Invert = !Invert; |
7519 | else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue()) |
7520 | return false; |
7521 | |
7522 | // Compute the effective CC mask for the new branch or select. |
7523 | auto *NewCCValid = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 2)); |
7524 | auto *NewCCMask = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 3)); |
7525 | if (!NewCCValid || !NewCCMask) |
7526 | return false; |
7527 | CCValid = NewCCValid->getZExtValue(); |
7528 | CCMask = NewCCMask->getZExtValue(); |
7529 | if (Invert) |
7530 | CCMask ^= CCValid; |
7531 | |
7532 | // Return the updated CCReg link. |
7533 | CCReg = CompareLHS->getOperand(Num: 4); |
7534 | return true; |
7535 | } |
7536 | |
7537 | // Optimize the case where CompareRHS is (SRA (SHL (IPM))). |
7538 | if (CompareLHS->getOpcode() == ISD::SRA) { |
7539 | auto *SRACount = dyn_cast<ConstantSDNode>(Val: CompareLHS->getOperand(Num: 1)); |
7540 | if (!SRACount || SRACount->getZExtValue() != 30) |
7541 | return false; |
7542 | auto *SHL = CompareLHS->getOperand(Num: 0).getNode(); |
7543 | if (SHL->getOpcode() != ISD::SHL) |
7544 | return false; |
7545 | auto *SHLCount = dyn_cast<ConstantSDNode>(Val: SHL->getOperand(Num: 1)); |
7546 | if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) |
7547 | return false; |
7548 | auto *IPM = SHL->getOperand(Num: 0).getNode(); |
7549 | if (IPM->getOpcode() != SystemZISD::IPM) |
7550 | return false; |
7551 | |
7552 | // Avoid introducing CC spills (because SRA would clobber CC). |
7553 | if (!CompareLHS->hasOneUse()) |
7554 | return false; |
7555 | // Verify that the ICMP compares against zero. |
7556 | if (CompareRHS->getZExtValue() != 0) |
7557 | return false; |
7558 | |
7559 | // Compute the effective CC mask for the new branch or select. |
7560 | CCMask = SystemZ::reverseCCMask(CCMask); |
7561 | |
7562 | // Return the updated CCReg link. |
7563 | CCReg = IPM->getOperand(Num: 0); |
7564 | return true; |
7565 | } |
7566 | |
7567 | return false; |
7568 | } |
7569 | |
7570 | SDValue SystemZTargetLowering::combineBR_CCMASK( |
7571 | SDNode *N, DAGCombinerInfo &DCI) const { |
7572 | SelectionDAG &DAG = DCI.DAG; |
7573 | |
7574 | // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. |
7575 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
7576 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
7577 | if (!CCValid || !CCMask) |
7578 | return SDValue(); |
7579 | |
7580 | int CCValidVal = CCValid->getZExtValue(); |
7581 | int CCMaskVal = CCMask->getZExtValue(); |
7582 | SDValue Chain = N->getOperand(Num: 0); |
7583 | SDValue CCReg = N->getOperand(Num: 4); |
7584 | |
7585 | if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) |
7586 | return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), |
7587 | Chain, |
7588 | DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), |
7589 | DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), |
7590 | N->getOperand(3), CCReg); |
7591 | return SDValue(); |
7592 | } |
7593 | |
7594 | SDValue SystemZTargetLowering::combineSELECT_CCMASK( |
7595 | SDNode *N, DAGCombinerInfo &DCI) const { |
7596 | SelectionDAG &DAG = DCI.DAG; |
7597 | |
7598 | // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. |
7599 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
7600 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3)); |
7601 | if (!CCValid || !CCMask) |
7602 | return SDValue(); |
7603 | |
7604 | int CCValidVal = CCValid->getZExtValue(); |
7605 | int CCMaskVal = CCMask->getZExtValue(); |
7606 | SDValue CCReg = N->getOperand(Num: 4); |
7607 | |
7608 | if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) |
7609 | return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), |
7610 | N->getOperand(0), N->getOperand(1), |
7611 | DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), |
7612 | DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), |
7613 | CCReg); |
7614 | return SDValue(); |
7615 | } |
7616 | |
7617 | |
7618 | SDValue SystemZTargetLowering::combineGET_CCMASK( |
7619 | SDNode *N, DAGCombinerInfo &DCI) const { |
7620 | |
7621 | // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible |
7622 | auto *CCValid = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1)); |
7623 | auto *CCMask = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2)); |
7624 | if (!CCValid || !CCMask) |
7625 | return SDValue(); |
7626 | int CCValidVal = CCValid->getZExtValue(); |
7627 | int CCMaskVal = CCMask->getZExtValue(); |
7628 | |
7629 | SDValue Select = N->getOperand(Num: 0); |
7630 | if (Select->getOpcode() == ISD::TRUNCATE) |
7631 | Select = Select->getOperand(Num: 0); |
7632 | if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) |
7633 | return SDValue(); |
7634 | |
7635 | auto *SelectCCValid = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 2)); |
7636 | auto *SelectCCMask = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 3)); |
7637 | if (!SelectCCValid || !SelectCCMask) |
7638 | return SDValue(); |
7639 | int SelectCCValidVal = SelectCCValid->getZExtValue(); |
7640 | int SelectCCMaskVal = SelectCCMask->getZExtValue(); |
7641 | |
7642 | auto *TrueVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 0)); |
7643 | auto *FalseVal = dyn_cast<ConstantSDNode>(Val: Select->getOperand(Num: 1)); |
7644 | if (!TrueVal || !FalseVal) |
7645 | return SDValue(); |
7646 | if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) |
7647 | ; |
7648 | else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) |
7649 | SelectCCMaskVal ^= SelectCCValidVal; |
7650 | else |
7651 | return SDValue(); |
7652 | |
7653 | if (SelectCCValidVal & ~CCValidVal) |
7654 | return SDValue(); |
7655 | if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) |
7656 | return SDValue(); |
7657 | |
7658 | return Select->getOperand(Num: 4); |
7659 | } |
7660 | |
7661 | SDValue SystemZTargetLowering::combineIntDIVREM( |
7662 | SDNode *N, DAGCombinerInfo &DCI) const { |
7663 | SelectionDAG &DAG = DCI.DAG; |
7664 | EVT VT = N->getValueType(ResNo: 0); |
7665 | // In the case where the divisor is a vector of constants a cheaper |
7666 | // sequence of instructions can replace the divide. BuildSDIV is called to |
7667 | // do this during DAG combining, but it only succeeds when it can build a |
7668 | // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and |
7669 | // since it is not Legal but Custom it can only happen before |
7670 | // legalization. Therefore we must scalarize this early before Combine |
7671 | // 1. For widened vectors, this is already the result of type legalization. |
7672 | if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && |
7673 | DAG.isConstantIntBuildVectorOrConstantInt(N: N->getOperand(Num: 1))) |
7674 | return DAG.UnrollVectorOp(N); |
7675 | return SDValue(); |
7676 | } |
7677 | |
7678 | SDValue SystemZTargetLowering::combineINTRINSIC( |
7679 | SDNode *N, DAGCombinerInfo &DCI) const { |
7680 | SelectionDAG &DAG = DCI.DAG; |
7681 | |
7682 | unsigned Id = N->getConstantOperandVal(Num: 1); |
7683 | switch (Id) { |
7684 | // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 |
7685 | // or larger is simply a vector load. |
7686 | case Intrinsic::s390_vll: |
7687 | case Intrinsic::s390_vlrl: |
7688 | if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 2))) |
7689 | if (C->getZExtValue() >= 15) |
7690 | return DAG.getLoad(VT: N->getValueType(ResNo: 0), dl: SDLoc(N), Chain: N->getOperand(Num: 0), |
7691 | Ptr: N->getOperand(Num: 3), PtrInfo: MachinePointerInfo()); |
7692 | break; |
7693 | // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. |
7694 | case Intrinsic::s390_vstl: |
7695 | case Intrinsic::s390_vstrl: |
7696 | if (auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 3))) |
7697 | if (C->getZExtValue() >= 15) |
7698 | return DAG.getStore(Chain: N->getOperand(Num: 0), dl: SDLoc(N), Val: N->getOperand(Num: 2), |
7699 | Ptr: N->getOperand(Num: 4), PtrInfo: MachinePointerInfo()); |
7700 | break; |
7701 | } |
7702 | |
7703 | return SDValue(); |
7704 | } |
7705 | |
7706 | SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { |
7707 | if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) |
7708 | return N->getOperand(Num: 0); |
7709 | return N; |
7710 | } |
7711 | |
7712 | SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, |
7713 | DAGCombinerInfo &DCI) const { |
7714 | switch(N->getOpcode()) { |
7715 | default: break; |
7716 | case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); |
7717 | case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); |
7718 | case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); |
7719 | case SystemZISD::MERGE_HIGH: |
7720 | case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); |
7721 | case ISD::LOAD: return combineLOAD(N, DCI); |
7722 | case ISD::STORE: return combineSTORE(N, DCI); |
7723 | case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); |
7724 | case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); |
7725 | case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); |
7726 | case ISD::STRICT_FP_ROUND: |
7727 | case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); |
7728 | case ISD::STRICT_FP_EXTEND: |
7729 | case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); |
7730 | case ISD::SINT_TO_FP: |
7731 | case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); |
7732 | case ISD::BSWAP: return combineBSWAP(N, DCI); |
7733 | case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); |
7734 | case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); |
7735 | case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); |
7736 | case ISD::SDIV: |
7737 | case ISD::UDIV: |
7738 | case ISD::SREM: |
7739 | case ISD::UREM: return combineIntDIVREM(N, DCI); |
7740 | case ISD::INTRINSIC_W_CHAIN: |
7741 | case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); |
7742 | } |
7743 | |
7744 | return SDValue(); |
7745 | } |
7746 | |
7747 | // Return the demanded elements for the OpNo source operand of Op. DemandedElts |
7748 | // are for Op. |
7749 | static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, |
7750 | unsigned OpNo) { |
7751 | EVT VT = Op.getValueType(); |
7752 | unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); |
7753 | APInt SrcDemE; |
7754 | unsigned Opcode = Op.getOpcode(); |
7755 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
7756 | unsigned Id = Op.getConstantOperandVal(i: 0); |
7757 | switch (Id) { |
7758 | case Intrinsic::s390_vpksh: // PACKS |
7759 | case Intrinsic::s390_vpksf: |
7760 | case Intrinsic::s390_vpksg: |
7761 | case Intrinsic::s390_vpkshs: // PACKS_CC |
7762 | case Intrinsic::s390_vpksfs: |
7763 | case Intrinsic::s390_vpksgs: |
7764 | case Intrinsic::s390_vpklsh: // PACKLS |
7765 | case Intrinsic::s390_vpklsf: |
7766 | case Intrinsic::s390_vpklsg: |
7767 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
7768 | case Intrinsic::s390_vpklsfs: |
7769 | case Intrinsic::s390_vpklsgs: |
7770 | // VECTOR PACK truncates the elements of two source vectors into one. |
7771 | SrcDemE = DemandedElts; |
7772 | if (OpNo == 2) |
7773 | SrcDemE.lshrInPlace(ShiftAmt: NumElts / 2); |
7774 | SrcDemE = SrcDemE.trunc(width: NumElts / 2); |
7775 | break; |
7776 | // VECTOR UNPACK extends half the elements of the source vector. |
7777 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
7778 | case Intrinsic::s390_vuphh: |
7779 | case Intrinsic::s390_vuphf: |
7780 | case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH |
7781 | case Intrinsic::s390_vuplhh: |
7782 | case Intrinsic::s390_vuplhf: |
7783 | SrcDemE = APInt(NumElts * 2, 0); |
7784 | SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: 0); |
7785 | break; |
7786 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
7787 | case Intrinsic::s390_vuplhw: |
7788 | case Intrinsic::s390_vuplf: |
7789 | case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW |
7790 | case Intrinsic::s390_vupllh: |
7791 | case Intrinsic::s390_vupllf: |
7792 | SrcDemE = APInt(NumElts * 2, 0); |
7793 | SrcDemE.insertBits(SubBits: DemandedElts, bitPosition: NumElts); |
7794 | break; |
7795 | case Intrinsic::s390_vpdi: { |
7796 | // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. |
7797 | SrcDemE = APInt(NumElts, 0); |
7798 | if (!DemandedElts[OpNo - 1]) |
7799 | break; |
7800 | unsigned Mask = Op.getConstantOperandVal(i: 3); |
7801 | unsigned MaskBit = ((OpNo - 1) ? 1 : 4); |
7802 | // Demand input element 0 or 1, given by the mask bit value. |
7803 | SrcDemE.setBit((Mask & MaskBit)? 1 : 0); |
7804 | break; |
7805 | } |
7806 | case Intrinsic::s390_vsldb: { |
7807 | // VECTOR SHIFT LEFT DOUBLE BY BYTE |
7808 | assert(VT == MVT::v16i8 && "Unexpected type." ); |
7809 | unsigned FirstIdx = Op.getConstantOperandVal(i: 3); |
7810 | assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand." ); |
7811 | unsigned NumSrc0Els = 16 - FirstIdx; |
7812 | SrcDemE = APInt(NumElts, 0); |
7813 | if (OpNo == 1) { |
7814 | APInt DemEls = DemandedElts.trunc(width: NumSrc0Els); |
7815 | SrcDemE.insertBits(SubBits: DemEls, bitPosition: FirstIdx); |
7816 | } else { |
7817 | APInt DemEls = DemandedElts.lshr(shiftAmt: NumSrc0Els); |
7818 | SrcDemE.insertBits(SubBits: DemEls, bitPosition: 0); |
7819 | } |
7820 | break; |
7821 | } |
7822 | case Intrinsic::s390_vperm: |
7823 | SrcDemE = APInt(NumElts, -1); |
7824 | break; |
7825 | default: |
7826 | llvm_unreachable("Unhandled intrinsic." ); |
7827 | break; |
7828 | } |
7829 | } else { |
7830 | switch (Opcode) { |
7831 | case SystemZISD::JOIN_DWORDS: |
7832 | // Scalar operand. |
7833 | SrcDemE = APInt(1, 1); |
7834 | break; |
7835 | case SystemZISD::SELECT_CCMASK: |
7836 | SrcDemE = DemandedElts; |
7837 | break; |
7838 | default: |
7839 | llvm_unreachable("Unhandled opcode." ); |
7840 | break; |
7841 | } |
7842 | } |
7843 | return SrcDemE; |
7844 | } |
7845 | |
7846 | static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, |
7847 | const APInt &DemandedElts, |
7848 | const SelectionDAG &DAG, unsigned Depth, |
7849 | unsigned OpNo) { |
7850 | APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); |
7851 | APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1); |
7852 | KnownBits LHSKnown = |
7853 | DAG.computeKnownBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1); |
7854 | KnownBits RHSKnown = |
7855 | DAG.computeKnownBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1); |
7856 | Known = LHSKnown.intersectWith(RHS: RHSKnown); |
7857 | } |
7858 | |
7859 | void |
7860 | SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
7861 | KnownBits &Known, |
7862 | const APInt &DemandedElts, |
7863 | const SelectionDAG &DAG, |
7864 | unsigned Depth) const { |
7865 | Known.resetAll(); |
7866 | |
7867 | // Intrinsic CC result is returned in the two low bits. |
7868 | unsigned tmp0, tmp1; // not used |
7869 | if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Opcode&: tmp0, CCValid&: tmp1)) { |
7870 | Known.Zero.setBitsFrom(2); |
7871 | return; |
7872 | } |
7873 | EVT VT = Op.getValueType(); |
7874 | if (Op.getResNo() != 0 || VT == MVT::Untyped) |
7875 | return; |
7876 | assert (Known.getBitWidth() == VT.getScalarSizeInBits() && |
7877 | "KnownBits does not match VT in bitwidth" ); |
7878 | assert ((!VT.isVector() || |
7879 | (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && |
7880 | "DemandedElts does not match VT number of elements" ); |
7881 | unsigned BitWidth = Known.getBitWidth(); |
7882 | unsigned Opcode = Op.getOpcode(); |
7883 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
7884 | bool IsLogical = false; |
7885 | unsigned Id = Op.getConstantOperandVal(i: 0); |
7886 | switch (Id) { |
7887 | case Intrinsic::s390_vpksh: // PACKS |
7888 | case Intrinsic::s390_vpksf: |
7889 | case Intrinsic::s390_vpksg: |
7890 | case Intrinsic::s390_vpkshs: // PACKS_CC |
7891 | case Intrinsic::s390_vpksfs: |
7892 | case Intrinsic::s390_vpksgs: |
7893 | case Intrinsic::s390_vpklsh: // PACKLS |
7894 | case Intrinsic::s390_vpklsf: |
7895 | case Intrinsic::s390_vpklsg: |
7896 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
7897 | case Intrinsic::s390_vpklsfs: |
7898 | case Intrinsic::s390_vpklsgs: |
7899 | case Intrinsic::s390_vpdi: |
7900 | case Intrinsic::s390_vsldb: |
7901 | case Intrinsic::s390_vperm: |
7902 | computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 1); |
7903 | break; |
7904 | case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH |
7905 | case Intrinsic::s390_vuplhh: |
7906 | case Intrinsic::s390_vuplhf: |
7907 | case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW |
7908 | case Intrinsic::s390_vupllh: |
7909 | case Intrinsic::s390_vupllf: |
7910 | IsLogical = true; |
7911 | [[fallthrough]]; |
7912 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
7913 | case Intrinsic::s390_vuphh: |
7914 | case Intrinsic::s390_vuphf: |
7915 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
7916 | case Intrinsic::s390_vuplhw: |
7917 | case Intrinsic::s390_vuplf: { |
7918 | SDValue SrcOp = Op.getOperand(i: 1); |
7919 | APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 0); |
7920 | Known = DAG.computeKnownBits(Op: SrcOp, DemandedElts: SrcDemE, Depth: Depth + 1); |
7921 | if (IsLogical) { |
7922 | Known = Known.zext(BitWidth); |
7923 | } else |
7924 | Known = Known.sext(BitWidth); |
7925 | break; |
7926 | } |
7927 | default: |
7928 | break; |
7929 | } |
7930 | } else { |
7931 | switch (Opcode) { |
7932 | case SystemZISD::JOIN_DWORDS: |
7933 | case SystemZISD::SELECT_CCMASK: |
7934 | computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, OpNo: 0); |
7935 | break; |
7936 | case SystemZISD::REPLICATE: { |
7937 | SDValue SrcOp = Op.getOperand(i: 0); |
7938 | Known = DAG.computeKnownBits(Op: SrcOp, Depth: Depth + 1); |
7939 | if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(Val: SrcOp)) |
7940 | Known = Known.sext(BitWidth); // VREPI sign extends the immedate. |
7941 | break; |
7942 | } |
7943 | default: |
7944 | break; |
7945 | } |
7946 | } |
7947 | |
7948 | // Known has the width of the source operand(s). Adjust if needed to match |
7949 | // the passed bitwidth. |
7950 | if (Known.getBitWidth() != BitWidth) |
7951 | Known = Known.anyextOrTrunc(BitWidth); |
7952 | } |
7953 | |
7954 | static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, |
7955 | const SelectionDAG &DAG, unsigned Depth, |
7956 | unsigned OpNo) { |
7957 | APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); |
7958 | unsigned LHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo), DemandedElts: Src0DemE, Depth: Depth + 1); |
7959 | if (LHS == 1) return 1; // Early out. |
7960 | APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo: OpNo + 1); |
7961 | unsigned RHS = DAG.ComputeNumSignBits(Op: Op.getOperand(i: OpNo + 1), DemandedElts: Src1DemE, Depth: Depth + 1); |
7962 | if (RHS == 1) return 1; // Early out. |
7963 | unsigned Common = std::min(a: LHS, b: RHS); |
7964 | unsigned SrcBitWidth = Op.getOperand(i: OpNo).getScalarValueSizeInBits(); |
7965 | EVT VT = Op.getValueType(); |
7966 | unsigned VTBits = VT.getScalarSizeInBits(); |
7967 | if (SrcBitWidth > VTBits) { // PACK |
7968 | unsigned = SrcBitWidth - VTBits; |
7969 | if (Common > SrcExtraBits) |
7970 | return (Common - SrcExtraBits); |
7971 | return 1; |
7972 | } |
7973 | assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth." ); |
7974 | return Common; |
7975 | } |
7976 | |
7977 | unsigned |
7978 | SystemZTargetLowering::ComputeNumSignBitsForTargetNode( |
7979 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
7980 | unsigned Depth) const { |
7981 | if (Op.getResNo() != 0) |
7982 | return 1; |
7983 | unsigned Opcode = Op.getOpcode(); |
7984 | if (Opcode == ISD::INTRINSIC_WO_CHAIN) { |
7985 | unsigned Id = Op.getConstantOperandVal(i: 0); |
7986 | switch (Id) { |
7987 | case Intrinsic::s390_vpksh: // PACKS |
7988 | case Intrinsic::s390_vpksf: |
7989 | case Intrinsic::s390_vpksg: |
7990 | case Intrinsic::s390_vpkshs: // PACKS_CC |
7991 | case Intrinsic::s390_vpksfs: |
7992 | case Intrinsic::s390_vpksgs: |
7993 | case Intrinsic::s390_vpklsh: // PACKLS |
7994 | case Intrinsic::s390_vpklsf: |
7995 | case Intrinsic::s390_vpklsg: |
7996 | case Intrinsic::s390_vpklshs: // PACKLS_CC |
7997 | case Intrinsic::s390_vpklsfs: |
7998 | case Intrinsic::s390_vpklsgs: |
7999 | case Intrinsic::s390_vpdi: |
8000 | case Intrinsic::s390_vsldb: |
8001 | case Intrinsic::s390_vperm: |
8002 | return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 1); |
8003 | case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH |
8004 | case Intrinsic::s390_vuphh: |
8005 | case Intrinsic::s390_vuphf: |
8006 | case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW |
8007 | case Intrinsic::s390_vuplhw: |
8008 | case Intrinsic::s390_vuplf: { |
8009 | SDValue PackedOp = Op.getOperand(i: 1); |
8010 | APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, OpNo: 1); |
8011 | unsigned Tmp = DAG.ComputeNumSignBits(Op: PackedOp, DemandedElts: SrcDemE, Depth: Depth + 1); |
8012 | EVT VT = Op.getValueType(); |
8013 | unsigned VTBits = VT.getScalarSizeInBits(); |
8014 | Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); |
8015 | return Tmp; |
8016 | } |
8017 | default: |
8018 | break; |
8019 | } |
8020 | } else { |
8021 | switch (Opcode) { |
8022 | case SystemZISD::SELECT_CCMASK: |
8023 | return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, OpNo: 0); |
8024 | default: |
8025 | break; |
8026 | } |
8027 | } |
8028 | |
8029 | return 1; |
8030 | } |
8031 | |
8032 | bool SystemZTargetLowering:: |
8033 | isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, |
8034 | const APInt &DemandedElts, const SelectionDAG &DAG, |
8035 | bool PoisonOnly, unsigned Depth) const { |
8036 | switch (Op->getOpcode()) { |
8037 | case SystemZISD::PCREL_WRAPPER: |
8038 | case SystemZISD::PCREL_OFFSET: |
8039 | return true; |
8040 | } |
8041 | return false; |
8042 | } |
8043 | |
8044 | unsigned |
8045 | SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { |
8046 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
8047 | unsigned StackAlign = TFI->getStackAlignment(); |
8048 | assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && |
8049 | "Unexpected stack alignment" ); |
8050 | // The default stack probe size is 4096 if the function has no |
8051 | // stack-probe-size attribute. |
8052 | unsigned StackProbeSize = |
8053 | MF.getFunction().getFnAttributeAsParsedInteger(Kind: "stack-probe-size" , Default: 4096); |
8054 | // Round down to the stack alignment. |
8055 | StackProbeSize &= ~(StackAlign - 1); |
8056 | return StackProbeSize ? StackProbeSize : StackAlign; |
8057 | } |
8058 | |
8059 | //===----------------------------------------------------------------------===// |
8060 | // Custom insertion |
8061 | //===----------------------------------------------------------------------===// |
8062 | |
8063 | // Force base value Base into a register before MI. Return the register. |
8064 | static Register forceReg(MachineInstr &MI, MachineOperand &Base, |
8065 | const SystemZInstrInfo *TII) { |
8066 | MachineBasicBlock *MBB = MI.getParent(); |
8067 | MachineFunction &MF = *MBB->getParent(); |
8068 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8069 | |
8070 | if (Base.isReg()) { |
8071 | // Copy Base into a new virtual register to help register coalescing in |
8072 | // cases with multiple uses. |
8073 | Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
8074 | BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) |
8075 | .add(Base); |
8076 | return Reg; |
8077 | } |
8078 | |
8079 | Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
8080 | BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) |
8081 | .add(Base) |
8082 | .addImm(0) |
8083 | .addReg(0); |
8084 | return Reg; |
8085 | } |
8086 | |
8087 | // The CC operand of MI might be missing a kill marker because there |
8088 | // were multiple uses of CC, and ISel didn't know which to mark. |
8089 | // Figure out whether MI should have had a kill marker. |
8090 | static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { |
8091 | // Scan forward through BB for a use/def of CC. |
8092 | MachineBasicBlock::iterator miI(std::next(x: MachineBasicBlock::iterator(MI))); |
8093 | for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { |
8094 | const MachineInstr& mi = *miI; |
8095 | if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) |
8096 | return false; |
8097 | if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) |
8098 | break; // Should have kill-flag - update below. |
8099 | } |
8100 | |
8101 | // If we hit the end of the block, check whether CC is live into a |
8102 | // successor. |
8103 | if (miI == MBB->end()) { |
8104 | for (const MachineBasicBlock *Succ : MBB->successors()) |
8105 | if (Succ->isLiveIn(SystemZ::CC)) |
8106 | return false; |
8107 | } |
8108 | |
8109 | return true; |
8110 | } |
8111 | |
8112 | // Return true if it is OK for this Select pseudo-opcode to be cascaded |
8113 | // together with other Select pseudo-opcodes into a single basic-block with |
8114 | // a conditional jump around it. |
8115 | static bool isSelectPseudo(MachineInstr &MI) { |
8116 | switch (MI.getOpcode()) { |
8117 | case SystemZ::Select32: |
8118 | case SystemZ::Select64: |
8119 | case SystemZ::Select128: |
8120 | case SystemZ::SelectF32: |
8121 | case SystemZ::SelectF64: |
8122 | case SystemZ::SelectF128: |
8123 | case SystemZ::SelectVR32: |
8124 | case SystemZ::SelectVR64: |
8125 | case SystemZ::SelectVR128: |
8126 | return true; |
8127 | |
8128 | default: |
8129 | return false; |
8130 | } |
8131 | } |
8132 | |
8133 | // Helper function, which inserts PHI functions into SinkMBB: |
8134 | // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], |
8135 | // where %FalseValue(i) and %TrueValue(i) are taken from Selects. |
8136 | static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, |
8137 | MachineBasicBlock *TrueMBB, |
8138 | MachineBasicBlock *FalseMBB, |
8139 | MachineBasicBlock *SinkMBB) { |
8140 | MachineFunction *MF = TrueMBB->getParent(); |
8141 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
8142 | |
8143 | MachineInstr *FirstMI = Selects.front(); |
8144 | unsigned CCValid = FirstMI->getOperand(i: 3).getImm(); |
8145 | unsigned CCMask = FirstMI->getOperand(i: 4).getImm(); |
8146 | |
8147 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
8148 | |
8149 | // As we are creating the PHIs, we have to be careful if there is more than |
8150 | // one. Later Selects may reference the results of earlier Selects, but later |
8151 | // PHIs have to reference the individual true/false inputs from earlier PHIs. |
8152 | // That also means that PHI construction must work forward from earlier to |
8153 | // later, and that the code must maintain a mapping from earlier PHI's |
8154 | // destination registers, and the registers that went into the PHI. |
8155 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; |
8156 | |
8157 | for (auto *MI : Selects) { |
8158 | Register DestReg = MI->getOperand(i: 0).getReg(); |
8159 | Register TrueReg = MI->getOperand(i: 1).getReg(); |
8160 | Register FalseReg = MI->getOperand(i: 2).getReg(); |
8161 | |
8162 | // If this Select we are generating is the opposite condition from |
8163 | // the jump we generated, then we have to swap the operands for the |
8164 | // PHI that is going to be generated. |
8165 | if (MI->getOperand(i: 4).getImm() == (CCValid ^ CCMask)) |
8166 | std::swap(a&: TrueReg, b&: FalseReg); |
8167 | |
8168 | if (RegRewriteTable.contains(Val: TrueReg)) |
8169 | TrueReg = RegRewriteTable[TrueReg].first; |
8170 | |
8171 | if (RegRewriteTable.contains(Val: FalseReg)) |
8172 | FalseReg = RegRewriteTable[FalseReg].second; |
8173 | |
8174 | DebugLoc DL = MI->getDebugLoc(); |
8175 | BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) |
8176 | .addReg(TrueReg).addMBB(TrueMBB) |
8177 | .addReg(FalseReg).addMBB(FalseMBB); |
8178 | |
8179 | // Add this PHI to the rewrite table. |
8180 | RegRewriteTable[DestReg] = std::make_pair(x&: TrueReg, y&: FalseReg); |
8181 | } |
8182 | |
8183 | MF->getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs); |
8184 | } |
8185 | |
8186 | MachineBasicBlock * |
8187 | SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, |
8188 | MachineBasicBlock *BB) const { |
8189 | MachineFunction &MF = *BB->getParent(); |
8190 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
8191 | auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); |
8192 | assert(TFL->hasReservedCallFrame(MF) && |
8193 | "ADJSTACKDOWN and ADJSTACKUP should be no-ops" ); |
8194 | (void)TFL; |
8195 | // Get the MaxCallFrameSize value and erase MI since it serves no further |
8196 | // purpose as the call frame is statically reserved in the prolog. Set |
8197 | // AdjustsStack as MI is *not* mapped as a frame instruction. |
8198 | uint32_t NumBytes = MI.getOperand(i: 0).getImm(); |
8199 | if (NumBytes > MFI.getMaxCallFrameSize()) |
8200 | MFI.setMaxCallFrameSize(NumBytes); |
8201 | MFI.setAdjustsStack(true); |
8202 | |
8203 | MI.eraseFromParent(); |
8204 | return BB; |
8205 | } |
8206 | |
8207 | // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. |
8208 | MachineBasicBlock * |
8209 | SystemZTargetLowering::emitSelect(MachineInstr &MI, |
8210 | MachineBasicBlock *MBB) const { |
8211 | assert(isSelectPseudo(MI) && "Bad call to emitSelect()" ); |
8212 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8213 | |
8214 | unsigned CCValid = MI.getOperand(i: 3).getImm(); |
8215 | unsigned CCMask = MI.getOperand(i: 4).getImm(); |
8216 | |
8217 | // If we have a sequence of Select* pseudo instructions using the |
8218 | // same condition code value, we want to expand all of them into |
8219 | // a single pair of basic blocks using the same condition. |
8220 | SmallVector<MachineInstr*, 8> Selects; |
8221 | SmallVector<MachineInstr*, 8> DbgValues; |
8222 | Selects.push_back(Elt: &MI); |
8223 | unsigned Count = 0; |
8224 | for (MachineInstr &NextMI : llvm::make_range( |
8225 | x: std::next(x: MachineBasicBlock::iterator(MI)), y: MBB->end())) { |
8226 | if (isSelectPseudo(MI&: NextMI)) { |
8227 | assert(NextMI.getOperand(3).getImm() == CCValid && |
8228 | "Bad CCValid operands since CC was not redefined." ); |
8229 | if (NextMI.getOperand(i: 4).getImm() == CCMask || |
8230 | NextMI.getOperand(i: 4).getImm() == (CCValid ^ CCMask)) { |
8231 | Selects.push_back(Elt: &NextMI); |
8232 | continue; |
8233 | } |
8234 | break; |
8235 | } |
8236 | if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || |
8237 | NextMI.usesCustomInsertionHook()) |
8238 | break; |
8239 | bool User = false; |
8240 | for (auto *SelMI : Selects) |
8241 | if (NextMI.readsVirtualRegister(Reg: SelMI->getOperand(i: 0).getReg())) { |
8242 | User = true; |
8243 | break; |
8244 | } |
8245 | if (NextMI.isDebugInstr()) { |
8246 | if (User) { |
8247 | assert(NextMI.isDebugValue() && "Unhandled debug opcode." ); |
8248 | DbgValues.push_back(Elt: &NextMI); |
8249 | } |
8250 | } else if (User || ++Count > 20) |
8251 | break; |
8252 | } |
8253 | |
8254 | MachineInstr *LastMI = Selects.back(); |
8255 | bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || |
8256 | checkCCKill(*LastMI, MBB)); |
8257 | MachineBasicBlock *StartMBB = MBB; |
8258 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI: LastMI, MBB); |
8259 | MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8260 | |
8261 | // Unless CC was killed in the last Select instruction, mark it as |
8262 | // live-in to both FalseMBB and JoinMBB. |
8263 | if (!CCKilled) { |
8264 | FalseMBB->addLiveIn(SystemZ::CC); |
8265 | JoinMBB->addLiveIn(SystemZ::CC); |
8266 | } |
8267 | |
8268 | // StartMBB: |
8269 | // BRC CCMask, JoinMBB |
8270 | // # fallthrough to FalseMBB |
8271 | MBB = StartMBB; |
8272 | BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) |
8273 | .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); |
8274 | MBB->addSuccessor(Succ: JoinMBB); |
8275 | MBB->addSuccessor(Succ: FalseMBB); |
8276 | |
8277 | // FalseMBB: |
8278 | // # fallthrough to JoinMBB |
8279 | MBB = FalseMBB; |
8280 | MBB->addSuccessor(Succ: JoinMBB); |
8281 | |
8282 | // JoinMBB: |
8283 | // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] |
8284 | // ... |
8285 | MBB = JoinMBB; |
8286 | createPHIsForSelects(Selects, TrueMBB: StartMBB, FalseMBB, SinkMBB: MBB); |
8287 | for (auto *SelMI : Selects) |
8288 | SelMI->eraseFromParent(); |
8289 | |
8290 | MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); |
8291 | for (auto *DbgMI : DbgValues) |
8292 | MBB->splice(Where: InsertPos, Other: StartMBB, From: DbgMI); |
8293 | |
8294 | return JoinMBB; |
8295 | } |
8296 | |
8297 | // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. |
8298 | // StoreOpcode is the store to use and Invert says whether the store should |
8299 | // happen when the condition is false rather than true. If a STORE ON |
8300 | // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. |
8301 | MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, |
8302 | MachineBasicBlock *MBB, |
8303 | unsigned StoreOpcode, |
8304 | unsigned STOCOpcode, |
8305 | bool Invert) const { |
8306 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8307 | |
8308 | Register SrcReg = MI.getOperand(i: 0).getReg(); |
8309 | MachineOperand Base = MI.getOperand(i: 1); |
8310 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
8311 | Register IndexReg = MI.getOperand(i: 3).getReg(); |
8312 | unsigned CCValid = MI.getOperand(i: 4).getImm(); |
8313 | unsigned CCMask = MI.getOperand(i: 5).getImm(); |
8314 | DebugLoc DL = MI.getDebugLoc(); |
8315 | |
8316 | StoreOpcode = TII->getOpcodeForOffset(Opcode: StoreOpcode, Offset: Disp); |
8317 | |
8318 | // ISel pattern matching also adds a load memory operand of the same |
8319 | // address, so take special care to find the storing memory operand. |
8320 | MachineMemOperand *MMO = nullptr; |
8321 | for (auto *I : MI.memoperands()) |
8322 | if (I->isStore()) { |
8323 | MMO = I; |
8324 | break; |
8325 | } |
8326 | |
8327 | // Use STOCOpcode if possible. We could use different store patterns in |
8328 | // order to avoid matching the index register, but the performance trade-offs |
8329 | // might be more complicated in that case. |
8330 | if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { |
8331 | if (Invert) |
8332 | CCMask ^= CCValid; |
8333 | |
8334 | BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) |
8335 | .addReg(SrcReg) |
8336 | .add(Base) |
8337 | .addImm(Disp) |
8338 | .addImm(CCValid) |
8339 | .addImm(CCMask) |
8340 | .addMemOperand(MMO); |
8341 | |
8342 | MI.eraseFromParent(); |
8343 | return MBB; |
8344 | } |
8345 | |
8346 | // Get the condition needed to branch around the store. |
8347 | if (!Invert) |
8348 | CCMask ^= CCValid; |
8349 | |
8350 | MachineBasicBlock *StartMBB = MBB; |
8351 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); |
8352 | MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8353 | |
8354 | // Unless CC was killed in the CondStore instruction, mark it as |
8355 | // live-in to both FalseMBB and JoinMBB. |
8356 | if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && |
8357 | !checkCCKill(MI, JoinMBB)) { |
8358 | FalseMBB->addLiveIn(SystemZ::CC); |
8359 | JoinMBB->addLiveIn(SystemZ::CC); |
8360 | } |
8361 | |
8362 | // StartMBB: |
8363 | // BRC CCMask, JoinMBB |
8364 | // # fallthrough to FalseMBB |
8365 | MBB = StartMBB; |
8366 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8367 | .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); |
8368 | MBB->addSuccessor(Succ: JoinMBB); |
8369 | MBB->addSuccessor(Succ: FalseMBB); |
8370 | |
8371 | // FalseMBB: |
8372 | // store %SrcReg, %Disp(%Index,%Base) |
8373 | // # fallthrough to JoinMBB |
8374 | MBB = FalseMBB; |
8375 | BuildMI(MBB, DL, TII->get(StoreOpcode)) |
8376 | .addReg(SrcReg) |
8377 | .add(Base) |
8378 | .addImm(Disp) |
8379 | .addReg(IndexReg) |
8380 | .addMemOperand(MMO); |
8381 | MBB->addSuccessor(Succ: JoinMBB); |
8382 | |
8383 | MI.eraseFromParent(); |
8384 | return JoinMBB; |
8385 | } |
8386 | |
8387 | // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. |
8388 | MachineBasicBlock * |
8389 | SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, |
8390 | MachineBasicBlock *MBB, |
8391 | bool Unsigned) const { |
8392 | MachineFunction &MF = *MBB->getParent(); |
8393 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8394 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8395 | |
8396 | // Synthetic instruction to compare 128-bit values. |
8397 | // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. |
8398 | Register Op0 = MI.getOperand(i: 0).getReg(); |
8399 | Register Op1 = MI.getOperand(i: 1).getReg(); |
8400 | |
8401 | MachineBasicBlock *StartMBB = MBB; |
8402 | MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); |
8403 | MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8404 | |
8405 | // StartMBB: |
8406 | // |
8407 | // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. |
8408 | // Swap the inputs to get: |
8409 | // CC 1 if high(Op0) > high(Op1) |
8410 | // CC 2 if high(Op0) < high(Op1) |
8411 | // CC 0 if high(Op0) == high(Op1) |
8412 | // |
8413 | // If CC != 0, we'd done, so jump over the next instruction. |
8414 | // |
8415 | // VEC[L]G Op1, Op0 |
8416 | // JNE JoinMBB |
8417 | // # fallthrough to HiEqMBB |
8418 | MBB = StartMBB; |
8419 | int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; |
8420 | BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) |
8421 | .addReg(Op1).addReg(Op0); |
8422 | BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) |
8423 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); |
8424 | MBB->addSuccessor(Succ: JoinMBB); |
8425 | MBB->addSuccessor(Succ: HiEqMBB); |
8426 | |
8427 | // HiEqMBB: |
8428 | // |
8429 | // Otherwise, use VECTOR COMPARE HIGH LOGICAL. |
8430 | // Since we already know the high parts are equal, the CC |
8431 | // result will only depend on the low parts: |
8432 | // CC 1 if low(Op0) > low(Op1) |
8433 | // CC 3 if low(Op0) <= low(Op1) |
8434 | // |
8435 | // VCHLGS Tmp, Op0, Op1 |
8436 | // # fallthrough to JoinMBB |
8437 | MBB = HiEqMBB; |
8438 | Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); |
8439 | BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) |
8440 | .addReg(Op0).addReg(Op1); |
8441 | MBB->addSuccessor(Succ: JoinMBB); |
8442 | |
8443 | // Mark CC as live-in to JoinMBB. |
8444 | JoinMBB->addLiveIn(SystemZ::CC); |
8445 | |
8446 | MI.eraseFromParent(); |
8447 | return JoinMBB; |
8448 | } |
8449 | |
8450 | // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or |
8451 | // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs |
8452 | // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says |
8453 | // whether the field should be inverted after performing BinOpcode (e.g. for |
8454 | // NAND). |
8455 | MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( |
8456 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, |
8457 | bool Invert) const { |
8458 | MachineFunction &MF = *MBB->getParent(); |
8459 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8460 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8461 | |
8462 | // Extract the operands. Base can be a register or a frame index. |
8463 | // Src2 can be a register or immediate. |
8464 | Register Dest = MI.getOperand(i: 0).getReg(); |
8465 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
8466 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
8467 | MachineOperand Src2 = earlyUseOperand(Op: MI.getOperand(i: 3)); |
8468 | Register BitShift = MI.getOperand(i: 4).getReg(); |
8469 | Register NegBitShift = MI.getOperand(i: 5).getReg(); |
8470 | unsigned BitSize = MI.getOperand(i: 6).getImm(); |
8471 | DebugLoc DL = MI.getDebugLoc(); |
8472 | |
8473 | // Get the right opcodes for the displacement. |
8474 | unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); |
8475 | unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); |
8476 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
8477 | |
8478 | // Create virtual registers for temporary results. |
8479 | Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8480 | Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8481 | Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8482 | Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8483 | Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8484 | |
8485 | // Insert a basic block for the main loop. |
8486 | MachineBasicBlock *StartMBB = MBB; |
8487 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
8488 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8489 | |
8490 | // StartMBB: |
8491 | // ... |
8492 | // %OrigVal = L Disp(%Base) |
8493 | // # fall through to LoopMBB |
8494 | MBB = StartMBB; |
8495 | BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); |
8496 | MBB->addSuccessor(Succ: LoopMBB); |
8497 | |
8498 | // LoopMBB: |
8499 | // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] |
8500 | // %RotatedOldVal = RLL %OldVal, 0(%BitShift) |
8501 | // %RotatedNewVal = OP %RotatedOldVal, %Src2 |
8502 | // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) |
8503 | // %Dest = CS %OldVal, %NewVal, Disp(%Base) |
8504 | // JNE LoopMBB |
8505 | // # fall through to DoneMBB |
8506 | MBB = LoopMBB; |
8507 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) |
8508 | .addReg(OrigVal).addMBB(StartMBB) |
8509 | .addReg(Dest).addMBB(LoopMBB); |
8510 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) |
8511 | .addReg(OldVal).addReg(BitShift).addImm(0); |
8512 | if (Invert) { |
8513 | // Perform the operation normally and then invert every bit of the field. |
8514 | Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8515 | BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); |
8516 | // XILF with the upper BitSize bits set. |
8517 | BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) |
8518 | .addReg(Tmp).addImm(-1U << (32 - BitSize)); |
8519 | } else if (BinOpcode) |
8520 | // A simply binary operation. |
8521 | BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) |
8522 | .addReg(RotatedOldVal) |
8523 | .add(Src2); |
8524 | else |
8525 | // Use RISBG to rotate Src2 into position and use it to replace the |
8526 | // field in RotatedOldVal. |
8527 | BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) |
8528 | .addReg(RotatedOldVal).addReg(Src2.getReg()) |
8529 | .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); |
8530 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) |
8531 | .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); |
8532 | BuildMI(MBB, DL, TII->get(CSOpcode), Dest) |
8533 | .addReg(OldVal) |
8534 | .addReg(NewVal) |
8535 | .add(Base) |
8536 | .addImm(Disp); |
8537 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8538 | .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); |
8539 | MBB->addSuccessor(Succ: LoopMBB); |
8540 | MBB->addSuccessor(Succ: DoneMBB); |
8541 | |
8542 | MI.eraseFromParent(); |
8543 | return DoneMBB; |
8544 | } |
8545 | |
8546 | // Implement EmitInstrWithCustomInserter for subword pseudo |
8547 | // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the |
8548 | // instruction that should be used to compare the current field with the |
8549 | // minimum or maximum value. KeepOldMask is the BRC condition-code mask |
8550 | // for when the current field should be kept. |
8551 | MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( |
8552 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, |
8553 | unsigned KeepOldMask) const { |
8554 | MachineFunction &MF = *MBB->getParent(); |
8555 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8556 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8557 | |
8558 | // Extract the operands. Base can be a register or a frame index. |
8559 | Register Dest = MI.getOperand(i: 0).getReg(); |
8560 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
8561 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
8562 | Register Src2 = MI.getOperand(i: 3).getReg(); |
8563 | Register BitShift = MI.getOperand(i: 4).getReg(); |
8564 | Register NegBitShift = MI.getOperand(i: 5).getReg(); |
8565 | unsigned BitSize = MI.getOperand(i: 6).getImm(); |
8566 | DebugLoc DL = MI.getDebugLoc(); |
8567 | |
8568 | // Get the right opcodes for the displacement. |
8569 | unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); |
8570 | unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); |
8571 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
8572 | |
8573 | // Create virtual registers for temporary results. |
8574 | Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8575 | Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8576 | Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8577 | Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8578 | Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8579 | Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); |
8580 | |
8581 | // Insert 3 basic blocks for the loop. |
8582 | MachineBasicBlock *StartMBB = MBB; |
8583 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
8584 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8585 | MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(MBB: LoopMBB); |
8586 | MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(MBB: UseAltMBB); |
8587 | |
8588 | // StartMBB: |
8589 | // ... |
8590 | // %OrigVal = L Disp(%Base) |
8591 | // # fall through to LoopMBB |
8592 | MBB = StartMBB; |
8593 | BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); |
8594 | MBB->addSuccessor(Succ: LoopMBB); |
8595 | |
8596 | // LoopMBB: |
8597 | // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] |
8598 | // %RotatedOldVal = RLL %OldVal, 0(%BitShift) |
8599 | // CompareOpcode %RotatedOldVal, %Src2 |
8600 | // BRC KeepOldMask, UpdateMBB |
8601 | MBB = LoopMBB; |
8602 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) |
8603 | .addReg(OrigVal).addMBB(StartMBB) |
8604 | .addReg(Dest).addMBB(UpdateMBB); |
8605 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) |
8606 | .addReg(OldVal).addReg(BitShift).addImm(0); |
8607 | BuildMI(MBB, DL, TII->get(CompareOpcode)) |
8608 | .addReg(RotatedOldVal).addReg(Src2); |
8609 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8610 | .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); |
8611 | MBB->addSuccessor(Succ: UpdateMBB); |
8612 | MBB->addSuccessor(Succ: UseAltMBB); |
8613 | |
8614 | // UseAltMBB: |
8615 | // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 |
8616 | // # fall through to UpdateMBB |
8617 | MBB = UseAltMBB; |
8618 | BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) |
8619 | .addReg(RotatedOldVal).addReg(Src2) |
8620 | .addImm(32).addImm(31 + BitSize).addImm(0); |
8621 | MBB->addSuccessor(Succ: UpdateMBB); |
8622 | |
8623 | // UpdateMBB: |
8624 | // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], |
8625 | // [ %RotatedAltVal, UseAltMBB ] |
8626 | // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) |
8627 | // %Dest = CS %OldVal, %NewVal, Disp(%Base) |
8628 | // JNE LoopMBB |
8629 | // # fall through to DoneMBB |
8630 | MBB = UpdateMBB; |
8631 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) |
8632 | .addReg(RotatedOldVal).addMBB(LoopMBB) |
8633 | .addReg(RotatedAltVal).addMBB(UseAltMBB); |
8634 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) |
8635 | .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); |
8636 | BuildMI(MBB, DL, TII->get(CSOpcode), Dest) |
8637 | .addReg(OldVal) |
8638 | .addReg(NewVal) |
8639 | .add(Base) |
8640 | .addImm(Disp); |
8641 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8642 | .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); |
8643 | MBB->addSuccessor(Succ: LoopMBB); |
8644 | MBB->addSuccessor(Succ: DoneMBB); |
8645 | |
8646 | MI.eraseFromParent(); |
8647 | return DoneMBB; |
8648 | } |
8649 | |
8650 | // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW |
8651 | // instruction MI. |
8652 | MachineBasicBlock * |
8653 | SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, |
8654 | MachineBasicBlock *MBB) const { |
8655 | MachineFunction &MF = *MBB->getParent(); |
8656 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8657 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8658 | |
8659 | // Extract the operands. Base can be a register or a frame index. |
8660 | Register Dest = MI.getOperand(i: 0).getReg(); |
8661 | MachineOperand Base = earlyUseOperand(Op: MI.getOperand(i: 1)); |
8662 | int64_t Disp = MI.getOperand(i: 2).getImm(); |
8663 | Register CmpVal = MI.getOperand(i: 3).getReg(); |
8664 | Register OrigSwapVal = MI.getOperand(i: 4).getReg(); |
8665 | Register BitShift = MI.getOperand(i: 5).getReg(); |
8666 | Register NegBitShift = MI.getOperand(i: 6).getReg(); |
8667 | int64_t BitSize = MI.getOperand(i: 7).getImm(); |
8668 | DebugLoc DL = MI.getDebugLoc(); |
8669 | |
8670 | const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; |
8671 | |
8672 | // Get the right opcodes for the displacement and zero-extension. |
8673 | unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); |
8674 | unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); |
8675 | unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; |
8676 | assert(LOpcode && CSOpcode && "Displacement out of range" ); |
8677 | |
8678 | // Create virtual registers for temporary results. |
8679 | Register OrigOldVal = MRI.createVirtualRegister(RegClass: RC); |
8680 | Register OldVal = MRI.createVirtualRegister(RegClass: RC); |
8681 | Register SwapVal = MRI.createVirtualRegister(RegClass: RC); |
8682 | Register StoreVal = MRI.createVirtualRegister(RegClass: RC); |
8683 | Register OldValRot = MRI.createVirtualRegister(RegClass: RC); |
8684 | Register RetryOldVal = MRI.createVirtualRegister(RegClass: RC); |
8685 | Register RetrySwapVal = MRI.createVirtualRegister(RegClass: RC); |
8686 | |
8687 | // Insert 2 basic blocks for the loop. |
8688 | MachineBasicBlock *StartMBB = MBB; |
8689 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
8690 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8691 | MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(MBB: LoopMBB); |
8692 | |
8693 | // StartMBB: |
8694 | // ... |
8695 | // %OrigOldVal = L Disp(%Base) |
8696 | // # fall through to LoopMBB |
8697 | MBB = StartMBB; |
8698 | BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) |
8699 | .add(Base) |
8700 | .addImm(Disp) |
8701 | .addReg(0); |
8702 | MBB->addSuccessor(Succ: LoopMBB); |
8703 | |
8704 | // LoopMBB: |
8705 | // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] |
8706 | // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] |
8707 | // %OldValRot = RLL %OldVal, BitSize(%BitShift) |
8708 | // ^^ The low BitSize bits contain the field |
8709 | // of interest. |
8710 | // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 |
8711 | // ^^ Replace the upper 32-BitSize bits of the |
8712 | // swap value with those that we loaded and rotated. |
8713 | // %Dest = LL[CH] %OldValRot |
8714 | // CR %Dest, %CmpVal |
8715 | // JNE DoneMBB |
8716 | // # Fall through to SetMBB |
8717 | MBB = LoopMBB; |
8718 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) |
8719 | .addReg(OrigOldVal).addMBB(StartMBB) |
8720 | .addReg(RetryOldVal).addMBB(SetMBB); |
8721 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) |
8722 | .addReg(OrigSwapVal).addMBB(StartMBB) |
8723 | .addReg(RetrySwapVal).addMBB(SetMBB); |
8724 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) |
8725 | .addReg(OldVal).addReg(BitShift).addImm(BitSize); |
8726 | BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) |
8727 | .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); |
8728 | BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) |
8729 | .addReg(OldValRot); |
8730 | BuildMI(MBB, DL, TII->get(SystemZ::CR)) |
8731 | .addReg(Dest).addReg(CmpVal); |
8732 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8733 | .addImm(SystemZ::CCMASK_ICMP) |
8734 | .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); |
8735 | MBB->addSuccessor(Succ: DoneMBB); |
8736 | MBB->addSuccessor(Succ: SetMBB); |
8737 | |
8738 | // SetMBB: |
8739 | // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) |
8740 | // ^^ Rotate the new field to its proper position. |
8741 | // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) |
8742 | // JNE LoopMBB |
8743 | // # fall through to ExitMBB |
8744 | MBB = SetMBB; |
8745 | BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) |
8746 | .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); |
8747 | BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) |
8748 | .addReg(OldVal) |
8749 | .addReg(StoreVal) |
8750 | .add(Base) |
8751 | .addImm(Disp); |
8752 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8753 | .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); |
8754 | MBB->addSuccessor(Succ: LoopMBB); |
8755 | MBB->addSuccessor(Succ: DoneMBB); |
8756 | |
8757 | // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in |
8758 | // to the block after the loop. At this point, CC may have been defined |
8759 | // either by the CR in LoopMBB or by the CS in SetMBB. |
8760 | if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) |
8761 | DoneMBB->addLiveIn(SystemZ::CC); |
8762 | |
8763 | MI.eraseFromParent(); |
8764 | return DoneMBB; |
8765 | } |
8766 | |
8767 | // Emit a move from two GR64s to a GR128. |
8768 | MachineBasicBlock * |
8769 | SystemZTargetLowering::emitPair128(MachineInstr &MI, |
8770 | MachineBasicBlock *MBB) const { |
8771 | MachineFunction &MF = *MBB->getParent(); |
8772 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8773 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8774 | DebugLoc DL = MI.getDebugLoc(); |
8775 | |
8776 | Register Dest = MI.getOperand(i: 0).getReg(); |
8777 | Register Hi = MI.getOperand(i: 1).getReg(); |
8778 | Register Lo = MI.getOperand(i: 2).getReg(); |
8779 | Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); |
8780 | Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); |
8781 | |
8782 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1); |
8783 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2) |
8784 | .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64); |
8785 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) |
8786 | .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64); |
8787 | |
8788 | MI.eraseFromParent(); |
8789 | return MBB; |
8790 | } |
8791 | |
8792 | // Emit an extension from a GR64 to a GR128. ClearEven is true |
8793 | // if the high register of the GR128 value must be cleared or false if |
8794 | // it's "don't care". |
8795 | MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, |
8796 | MachineBasicBlock *MBB, |
8797 | bool ClearEven) const { |
8798 | MachineFunction &MF = *MBB->getParent(); |
8799 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8800 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8801 | DebugLoc DL = MI.getDebugLoc(); |
8802 | |
8803 | Register Dest = MI.getOperand(i: 0).getReg(); |
8804 | Register Src = MI.getOperand(i: 1).getReg(); |
8805 | Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); |
8806 | |
8807 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); |
8808 | if (ClearEven) { |
8809 | Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); |
8810 | Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); |
8811 | |
8812 | BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) |
8813 | .addImm(0); |
8814 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) |
8815 | .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); |
8816 | In128 = NewIn128; |
8817 | } |
8818 | BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) |
8819 | .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); |
8820 | |
8821 | MI.eraseFromParent(); |
8822 | return MBB; |
8823 | } |
8824 | |
8825 | MachineBasicBlock * |
8826 | SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, |
8827 | MachineBasicBlock *MBB, |
8828 | unsigned Opcode, bool IsMemset) const { |
8829 | MachineFunction &MF = *MBB->getParent(); |
8830 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
8831 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
8832 | DebugLoc DL = MI.getDebugLoc(); |
8833 | |
8834 | MachineOperand DestBase = earlyUseOperand(Op: MI.getOperand(i: 0)); |
8835 | uint64_t DestDisp = MI.getOperand(i: 1).getImm(); |
8836 | MachineOperand SrcBase = MachineOperand::CreateReg(Reg: 0U, isDef: false); |
8837 | uint64_t SrcDisp; |
8838 | |
8839 | // Fold the displacement Disp if it is out of range. |
8840 | auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { |
8841 | if (!isUInt<12>(x: Disp)) { |
8842 | Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
8843 | unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); |
8844 | BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) |
8845 | .add(Base).addImm(Disp).addReg(0); |
8846 | Base = MachineOperand::CreateReg(Reg, isDef: false); |
8847 | Disp = 0; |
8848 | } |
8849 | }; |
8850 | |
8851 | if (!IsMemset) { |
8852 | SrcBase = earlyUseOperand(Op: MI.getOperand(i: 2)); |
8853 | SrcDisp = MI.getOperand(i: 3).getImm(); |
8854 | } else { |
8855 | SrcBase = DestBase; |
8856 | SrcDisp = DestDisp++; |
8857 | foldDisplIfNeeded(DestBase, DestDisp); |
8858 | } |
8859 | |
8860 | MachineOperand &LengthMO = MI.getOperand(i: IsMemset ? 2 : 4); |
8861 | bool IsImmForm = LengthMO.isImm(); |
8862 | bool IsRegForm = !IsImmForm; |
8863 | |
8864 | // Build and insert one Opcode of Length, with special treatment for memset. |
8865 | auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, |
8866 | MachineBasicBlock::iterator InsPos, |
8867 | MachineOperand DBase, uint64_t DDisp, |
8868 | MachineOperand SBase, uint64_t SDisp, |
8869 | unsigned Length) -> void { |
8870 | assert(Length > 0 && Length <= 256 && "Building memory op with bad length." ); |
8871 | if (IsMemset) { |
8872 | MachineOperand ByteMO = earlyUseOperand(Op: MI.getOperand(i: 3)); |
8873 | if (ByteMO.isImm()) |
8874 | BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) |
8875 | .add(SBase).addImm(SDisp).add(ByteMO); |
8876 | else |
8877 | BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) |
8878 | .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); |
8879 | if (--Length == 0) |
8880 | return; |
8881 | } |
8882 | BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) |
8883 | .add(DBase).addImm(DDisp).addImm(Length) |
8884 | .add(SBase).addImm(SDisp) |
8885 | .setMemRefs(MI.memoperands()); |
8886 | }; |
8887 | |
8888 | bool NeedsLoop = false; |
8889 | uint64_t ImmLength = 0; |
8890 | Register LenAdjReg = SystemZ::NoRegister; |
8891 | if (IsImmForm) { |
8892 | ImmLength = LengthMO.getImm(); |
8893 | ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. |
8894 | if (ImmLength == 0) { |
8895 | MI.eraseFromParent(); |
8896 | return MBB; |
8897 | } |
8898 | if (Opcode == SystemZ::CLC) { |
8899 | if (ImmLength > 3 * 256) |
8900 | // A two-CLC sequence is a clear win over a loop, not least because |
8901 | // it needs only one branch. A three-CLC sequence needs the same |
8902 | // number of branches as a loop (i.e. 2), but is shorter. That |
8903 | // brings us to lengths greater than 768 bytes. It seems relatively |
8904 | // likely that a difference will be found within the first 768 bytes, |
8905 | // so we just optimize for the smallest number of branch |
8906 | // instructions, in order to avoid polluting the prediction buffer |
8907 | // too much. |
8908 | NeedsLoop = true; |
8909 | } else if (ImmLength > 6 * 256) |
8910 | // The heuristic we use is to prefer loops for anything that would |
8911 | // require 7 or more MVCs. With these kinds of sizes there isn't much |
8912 | // to choose between straight-line code and looping code, since the |
8913 | // time will be dominated by the MVCs themselves. |
8914 | NeedsLoop = true; |
8915 | } else { |
8916 | NeedsLoop = true; |
8917 | LenAdjReg = LengthMO.getReg(); |
8918 | } |
8919 | |
8920 | // When generating more than one CLC, all but the last will need to |
8921 | // branch to the end when a difference is found. |
8922 | MachineBasicBlock *EndMBB = |
8923 | (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) |
8924 | ? SystemZ::splitBlockAfter(MI, MBB) |
8925 | : nullptr); |
8926 | |
8927 | if (NeedsLoop) { |
8928 | Register StartCountReg = |
8929 | MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); |
8930 | if (IsImmForm) { |
8931 | TII->loadImmediate(MBB&: *MBB, MBBI: MI, Reg: StartCountReg, Value: ImmLength / 256); |
8932 | ImmLength &= 255; |
8933 | } else { |
8934 | BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) |
8935 | .addReg(LenAdjReg) |
8936 | .addReg(0) |
8937 | .addImm(8); |
8938 | } |
8939 | |
8940 | bool HaveSingleBase = DestBase.isIdenticalTo(Other: SrcBase); |
8941 | auto loadZeroAddress = [&]() -> MachineOperand { |
8942 | Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
8943 | BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); |
8944 | return MachineOperand::CreateReg(Reg, isDef: false); |
8945 | }; |
8946 | if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) |
8947 | DestBase = loadZeroAddress(); |
8948 | if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) |
8949 | SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); |
8950 | |
8951 | MachineBasicBlock *StartMBB = nullptr; |
8952 | MachineBasicBlock *LoopMBB = nullptr; |
8953 | MachineBasicBlock *NextMBB = nullptr; |
8954 | MachineBasicBlock *DoneMBB = nullptr; |
8955 | MachineBasicBlock *AllDoneMBB = nullptr; |
8956 | |
8957 | Register StartSrcReg = forceReg(MI, Base&: SrcBase, TII); |
8958 | Register StartDestReg = |
8959 | (HaveSingleBase ? StartSrcReg : forceReg(MI, Base&: DestBase, TII)); |
8960 | |
8961 | const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; |
8962 | Register ThisSrcReg = MRI.createVirtualRegister(RegClass: RC); |
8963 | Register ThisDestReg = |
8964 | (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RegClass: RC)); |
8965 | Register NextSrcReg = MRI.createVirtualRegister(RegClass: RC); |
8966 | Register NextDestReg = |
8967 | (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RegClass: RC)); |
8968 | RC = &SystemZ::GR64BitRegClass; |
8969 | Register ThisCountReg = MRI.createVirtualRegister(RegClass: RC); |
8970 | Register NextCountReg = MRI.createVirtualRegister(RegClass: RC); |
8971 | |
8972 | if (IsRegForm) { |
8973 | AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
8974 | StartMBB = SystemZ::emitBlockAfter(MBB); |
8975 | LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
8976 | NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB); |
8977 | DoneMBB = SystemZ::emitBlockAfter(MBB: NextMBB); |
8978 | |
8979 | // MBB: |
8980 | // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. |
8981 | BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) |
8982 | .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); |
8983 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
8984 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) |
8985 | .addMBB(AllDoneMBB); |
8986 | MBB->addSuccessor(Succ: AllDoneMBB); |
8987 | if (!IsMemset) |
8988 | MBB->addSuccessor(Succ: StartMBB); |
8989 | else { |
8990 | // MemsetOneCheckMBB: |
8991 | // # Jump to MemsetOneMBB for a memset of length 1, or |
8992 | // # fall thru to StartMBB. |
8993 | MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); |
8994 | MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(MBB: &*MF.rbegin()); |
8995 | MBB->addSuccessor(Succ: MemsetOneCheckMBB); |
8996 | MBB = MemsetOneCheckMBB; |
8997 | BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) |
8998 | .addReg(LenAdjReg).addImm(-1); |
8999 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9000 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) |
9001 | .addMBB(MemsetOneMBB); |
9002 | MBB->addSuccessor(Succ: MemsetOneMBB, Prob: {10, 100}); |
9003 | MBB->addSuccessor(Succ: StartMBB, Prob: {90, 100}); |
9004 | |
9005 | // MemsetOneMBB: |
9006 | // # Jump back to AllDoneMBB after a single MVI or STC. |
9007 | MBB = MemsetOneMBB; |
9008 | insertMemMemOp(MBB, MBB->end(), |
9009 | MachineOperand::CreateReg(Reg: StartDestReg, isDef: false), DestDisp, |
9010 | MachineOperand::CreateReg(Reg: StartSrcReg, isDef: false), SrcDisp, |
9011 | 1); |
9012 | BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); |
9013 | MBB->addSuccessor(Succ: AllDoneMBB); |
9014 | } |
9015 | |
9016 | // StartMBB: |
9017 | // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. |
9018 | MBB = StartMBB; |
9019 | BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) |
9020 | .addReg(StartCountReg).addImm(0); |
9021 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9022 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) |
9023 | .addMBB(DoneMBB); |
9024 | MBB->addSuccessor(Succ: DoneMBB); |
9025 | MBB->addSuccessor(Succ: LoopMBB); |
9026 | } |
9027 | else { |
9028 | StartMBB = MBB; |
9029 | DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
9030 | LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9031 | NextMBB = (EndMBB ? SystemZ::emitBlockAfter(MBB: LoopMBB) : LoopMBB); |
9032 | |
9033 | // StartMBB: |
9034 | // # fall through to LoopMBB |
9035 | MBB->addSuccessor(Succ: LoopMBB); |
9036 | |
9037 | DestBase = MachineOperand::CreateReg(Reg: NextDestReg, isDef: false); |
9038 | SrcBase = MachineOperand::CreateReg(Reg: NextSrcReg, isDef: false); |
9039 | if (EndMBB && !ImmLength) |
9040 | // If the loop handled the whole CLC range, DoneMBB will be empty with |
9041 | // CC live-through into EndMBB, so add it as live-in. |
9042 | DoneMBB->addLiveIn(SystemZ::CC); |
9043 | } |
9044 | |
9045 | // LoopMBB: |
9046 | // %ThisDestReg = phi [ %StartDestReg, StartMBB ], |
9047 | // [ %NextDestReg, NextMBB ] |
9048 | // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], |
9049 | // [ %NextSrcReg, NextMBB ] |
9050 | // %ThisCountReg = phi [ %StartCountReg, StartMBB ], |
9051 | // [ %NextCountReg, NextMBB ] |
9052 | // ( PFD 2, 768+DestDisp(%ThisDestReg) ) |
9053 | // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) |
9054 | // ( JLH EndMBB ) |
9055 | // |
9056 | // The prefetch is used only for MVC. The JLH is used only for CLC. |
9057 | MBB = LoopMBB; |
9058 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) |
9059 | .addReg(StartDestReg).addMBB(StartMBB) |
9060 | .addReg(NextDestReg).addMBB(NextMBB); |
9061 | if (!HaveSingleBase) |
9062 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) |
9063 | .addReg(StartSrcReg).addMBB(StartMBB) |
9064 | .addReg(NextSrcReg).addMBB(NextMBB); |
9065 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) |
9066 | .addReg(StartCountReg).addMBB(StartMBB) |
9067 | .addReg(NextCountReg).addMBB(NextMBB); |
9068 | if (Opcode == SystemZ::MVC) |
9069 | BuildMI(MBB, DL, TII->get(SystemZ::PFD)) |
9070 | .addImm(SystemZ::PFD_WRITE) |
9071 | .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); |
9072 | insertMemMemOp(MBB, MBB->end(), |
9073 | MachineOperand::CreateReg(Reg: ThisDestReg, isDef: false), DestDisp, |
9074 | MachineOperand::CreateReg(Reg: ThisSrcReg, isDef: false), SrcDisp, 256); |
9075 | if (EndMBB) { |
9076 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9077 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) |
9078 | .addMBB(EndMBB); |
9079 | MBB->addSuccessor(Succ: EndMBB); |
9080 | MBB->addSuccessor(Succ: NextMBB); |
9081 | } |
9082 | |
9083 | // NextMBB: |
9084 | // %NextDestReg = LA 256(%ThisDestReg) |
9085 | // %NextSrcReg = LA 256(%ThisSrcReg) |
9086 | // %NextCountReg = AGHI %ThisCountReg, -1 |
9087 | // CGHI %NextCountReg, 0 |
9088 | // JLH LoopMBB |
9089 | // # fall through to DoneMBB |
9090 | // |
9091 | // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. |
9092 | MBB = NextMBB; |
9093 | BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) |
9094 | .addReg(ThisDestReg).addImm(256).addReg(0); |
9095 | if (!HaveSingleBase) |
9096 | BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) |
9097 | .addReg(ThisSrcReg).addImm(256).addReg(0); |
9098 | BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) |
9099 | .addReg(ThisCountReg).addImm(-1); |
9100 | BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) |
9101 | .addReg(NextCountReg).addImm(0); |
9102 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9103 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) |
9104 | .addMBB(LoopMBB); |
9105 | MBB->addSuccessor(Succ: LoopMBB); |
9106 | MBB->addSuccessor(Succ: DoneMBB); |
9107 | |
9108 | MBB = DoneMBB; |
9109 | if (IsRegForm) { |
9110 | // DoneMBB: |
9111 | // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. |
9112 | // # Use EXecute Relative Long for the remainder of the bytes. The target |
9113 | // instruction of the EXRL will have a length field of 1 since 0 is an |
9114 | // illegal value. The number of bytes processed becomes (%LenAdjReg & |
9115 | // 0xff) + 1. |
9116 | // # Fall through to AllDoneMBB. |
9117 | Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
9118 | Register RemDestReg = HaveSingleBase ? RemSrcReg |
9119 | : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
9120 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) |
9121 | .addReg(StartDestReg).addMBB(StartMBB) |
9122 | .addReg(NextDestReg).addMBB(NextMBB); |
9123 | if (!HaveSingleBase) |
9124 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) |
9125 | .addReg(StartSrcReg).addMBB(StartMBB) |
9126 | .addReg(NextSrcReg).addMBB(NextMBB); |
9127 | if (IsMemset) |
9128 | insertMemMemOp(MBB, MBB->end(), |
9129 | MachineOperand::CreateReg(Reg: RemDestReg, isDef: false), DestDisp, |
9130 | MachineOperand::CreateReg(Reg: RemSrcReg, isDef: false), SrcDisp, 1); |
9131 | MachineInstrBuilder EXRL_MIB = |
9132 | BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) |
9133 | .addImm(Opcode) |
9134 | .addReg(LenAdjReg) |
9135 | .addReg(RemDestReg).addImm(DestDisp) |
9136 | .addReg(RemSrcReg).addImm(SrcDisp); |
9137 | MBB->addSuccessor(Succ: AllDoneMBB); |
9138 | MBB = AllDoneMBB; |
9139 | if (Opcode != SystemZ::MVC) { |
9140 | EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); |
9141 | if (EndMBB) |
9142 | MBB->addLiveIn(SystemZ::CC); |
9143 | } |
9144 | } |
9145 | MF.getProperties().reset(P: MachineFunctionProperties::Property::NoPHIs); |
9146 | } |
9147 | |
9148 | // Handle any remaining bytes with straight-line code. |
9149 | while (ImmLength > 0) { |
9150 | uint64_t ThisLength = std::min(a: ImmLength, b: uint64_t(256)); |
9151 | // The previous iteration might have created out-of-range displacements. |
9152 | // Apply them using LA/LAY if so. |
9153 | foldDisplIfNeeded(DestBase, DestDisp); |
9154 | foldDisplIfNeeded(SrcBase, SrcDisp); |
9155 | insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); |
9156 | DestDisp += ThisLength; |
9157 | SrcDisp += ThisLength; |
9158 | ImmLength -= ThisLength; |
9159 | // If there's another CLC to go, branch to the end if a difference |
9160 | // was found. |
9161 | if (EndMBB && ImmLength > 0) { |
9162 | MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); |
9163 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9164 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) |
9165 | .addMBB(EndMBB); |
9166 | MBB->addSuccessor(Succ: EndMBB); |
9167 | MBB->addSuccessor(Succ: NextMBB); |
9168 | MBB = NextMBB; |
9169 | } |
9170 | } |
9171 | if (EndMBB) { |
9172 | MBB->addSuccessor(Succ: EndMBB); |
9173 | MBB = EndMBB; |
9174 | MBB->addLiveIn(SystemZ::CC); |
9175 | } |
9176 | |
9177 | MI.eraseFromParent(); |
9178 | return MBB; |
9179 | } |
9180 | |
9181 | // Decompose string pseudo-instruction MI into a loop that continually performs |
9182 | // Opcode until CC != 3. |
9183 | MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( |
9184 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { |
9185 | MachineFunction &MF = *MBB->getParent(); |
9186 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9187 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
9188 | DebugLoc DL = MI.getDebugLoc(); |
9189 | |
9190 | uint64_t End1Reg = MI.getOperand(i: 0).getReg(); |
9191 | uint64_t Start1Reg = MI.getOperand(i: 1).getReg(); |
9192 | uint64_t Start2Reg = MI.getOperand(i: 2).getReg(); |
9193 | uint64_t CharReg = MI.getOperand(i: 3).getReg(); |
9194 | |
9195 | const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; |
9196 | uint64_t This1Reg = MRI.createVirtualRegister(RegClass: RC); |
9197 | uint64_t This2Reg = MRI.createVirtualRegister(RegClass: RC); |
9198 | uint64_t End2Reg = MRI.createVirtualRegister(RegClass: RC); |
9199 | |
9200 | MachineBasicBlock *StartMBB = MBB; |
9201 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); |
9202 | MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9203 | |
9204 | // StartMBB: |
9205 | // # fall through to LoopMBB |
9206 | MBB->addSuccessor(Succ: LoopMBB); |
9207 | |
9208 | // LoopMBB: |
9209 | // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] |
9210 | // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] |
9211 | // R0L = %CharReg |
9212 | // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L |
9213 | // JO LoopMBB |
9214 | // # fall through to DoneMBB |
9215 | // |
9216 | // The load of R0L can be hoisted by post-RA LICM. |
9217 | MBB = LoopMBB; |
9218 | |
9219 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) |
9220 | .addReg(Start1Reg).addMBB(StartMBB) |
9221 | .addReg(End1Reg).addMBB(LoopMBB); |
9222 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) |
9223 | .addReg(Start2Reg).addMBB(StartMBB) |
9224 | .addReg(End2Reg).addMBB(LoopMBB); |
9225 | BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); |
9226 | BuildMI(MBB, DL, TII->get(Opcode)) |
9227 | .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) |
9228 | .addReg(This1Reg).addReg(This2Reg); |
9229 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9230 | .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); |
9231 | MBB->addSuccessor(Succ: LoopMBB); |
9232 | MBB->addSuccessor(Succ: DoneMBB); |
9233 | |
9234 | DoneMBB->addLiveIn(SystemZ::CC); |
9235 | |
9236 | MI.eraseFromParent(); |
9237 | return DoneMBB; |
9238 | } |
9239 | |
9240 | // Update TBEGIN instruction with final opcode and register clobbers. |
9241 | MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( |
9242 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, |
9243 | bool NoFloat) const { |
9244 | MachineFunction &MF = *MBB->getParent(); |
9245 | const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); |
9246 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9247 | |
9248 | // Update opcode. |
9249 | MI.setDesc(TII->get(Opcode)); |
9250 | |
9251 | // We cannot handle a TBEGIN that clobbers the stack or frame pointer. |
9252 | // Make sure to add the corresponding GRSM bits if they are missing. |
9253 | uint64_t Control = MI.getOperand(i: 2).getImm(); |
9254 | static const unsigned GPRControlBit[16] = { |
9255 | 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, |
9256 | 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 |
9257 | }; |
9258 | Control |= GPRControlBit[15]; |
9259 | if (TFI->hasFP(MF)) |
9260 | Control |= GPRControlBit[11]; |
9261 | MI.getOperand(i: 2).setImm(Control); |
9262 | |
9263 | // Add GPR clobbers. |
9264 | for (int I = 0; I < 16; I++) { |
9265 | if ((Control & GPRControlBit[I]) == 0) { |
9266 | unsigned Reg = SystemZMC::GR64Regs[I]; |
9267 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
9268 | } |
9269 | } |
9270 | |
9271 | // Add FPR/VR clobbers. |
9272 | if (!NoFloat && (Control & 4) != 0) { |
9273 | if (Subtarget.hasVector()) { |
9274 | for (unsigned Reg : SystemZMC::VR128Regs) { |
9275 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
9276 | } |
9277 | } else { |
9278 | for (unsigned Reg : SystemZMC::FP64Regs) { |
9279 | MI.addOperand(Op: MachineOperand::CreateReg(Reg, isDef: true, isImp: true)); |
9280 | } |
9281 | } |
9282 | } |
9283 | |
9284 | return MBB; |
9285 | } |
9286 | |
9287 | MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( |
9288 | MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { |
9289 | MachineFunction &MF = *MBB->getParent(); |
9290 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
9291 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9292 | DebugLoc DL = MI.getDebugLoc(); |
9293 | |
9294 | Register SrcReg = MI.getOperand(i: 0).getReg(); |
9295 | |
9296 | // Create new virtual register of the same class as source. |
9297 | const TargetRegisterClass *RC = MRI->getRegClass(Reg: SrcReg); |
9298 | Register DstReg = MRI->createVirtualRegister(RegClass: RC); |
9299 | |
9300 | // Replace pseudo with a normal load-and-test that models the def as |
9301 | // well. |
9302 | BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) |
9303 | .addReg(SrcReg) |
9304 | .setMIFlags(MI.getFlags()); |
9305 | MI.eraseFromParent(); |
9306 | |
9307 | return MBB; |
9308 | } |
9309 | |
9310 | MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( |
9311 | MachineInstr &MI, MachineBasicBlock *MBB) const { |
9312 | MachineFunction &MF = *MBB->getParent(); |
9313 | MachineRegisterInfo *MRI = &MF.getRegInfo(); |
9314 | const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); |
9315 | DebugLoc DL = MI.getDebugLoc(); |
9316 | const unsigned ProbeSize = getStackProbeSize(MF); |
9317 | Register DstReg = MI.getOperand(i: 0).getReg(); |
9318 | Register SizeReg = MI.getOperand(i: 2).getReg(); |
9319 | |
9320 | MachineBasicBlock *StartMBB = MBB; |
9321 | MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); |
9322 | MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(MBB: StartMBB); |
9323 | MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(MBB: LoopTestMBB); |
9324 | MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(MBB: LoopBodyMBB); |
9325 | MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(MBB: TailTestMBB); |
9326 | |
9327 | MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(PtrInfo: MachinePointerInfo(), |
9328 | F: MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, Size: 8, BaseAlignment: Align(1)); |
9329 | |
9330 | Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
9331 | Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); |
9332 | |
9333 | // LoopTestMBB |
9334 | // BRC TailTestMBB |
9335 | // # fallthrough to LoopBodyMBB |
9336 | StartMBB->addSuccessor(Succ: LoopTestMBB); |
9337 | MBB = LoopTestMBB; |
9338 | BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) |
9339 | .addReg(SizeReg) |
9340 | .addMBB(StartMBB) |
9341 | .addReg(IncReg) |
9342 | .addMBB(LoopBodyMBB); |
9343 | BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) |
9344 | .addReg(PHIReg) |
9345 | .addImm(ProbeSize); |
9346 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9347 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) |
9348 | .addMBB(TailTestMBB); |
9349 | MBB->addSuccessor(Succ: LoopBodyMBB); |
9350 | MBB->addSuccessor(Succ: TailTestMBB); |
9351 | |
9352 | // LoopBodyMBB: Allocate and probe by means of a volatile compare. |
9353 | // J LoopTestMBB |
9354 | MBB = LoopBodyMBB; |
9355 | BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) |
9356 | .addReg(PHIReg) |
9357 | .addImm(ProbeSize); |
9358 | BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) |
9359 | .addReg(SystemZ::R15D) |
9360 | .addImm(ProbeSize); |
9361 | BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) |
9362 | .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) |
9363 | .setMemRefs(VolLdMMO); |
9364 | BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); |
9365 | MBB->addSuccessor(Succ: LoopTestMBB); |
9366 | |
9367 | // TailTestMBB |
9368 | // BRC DoneMBB |
9369 | // # fallthrough to TailMBB |
9370 | MBB = TailTestMBB; |
9371 | BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) |
9372 | .addReg(PHIReg) |
9373 | .addImm(0); |
9374 | BuildMI(MBB, DL, TII->get(SystemZ::BRC)) |
9375 | .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) |
9376 | .addMBB(DoneMBB); |
9377 | MBB->addSuccessor(Succ: TailMBB); |
9378 | MBB->addSuccessor(Succ: DoneMBB); |
9379 | |
9380 | // TailMBB |
9381 | // # fallthrough to DoneMBB |
9382 | MBB = TailMBB; |
9383 | BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) |
9384 | .addReg(SystemZ::R15D) |
9385 | .addReg(PHIReg); |
9386 | BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) |
9387 | .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) |
9388 | .setMemRefs(VolLdMMO); |
9389 | MBB->addSuccessor(Succ: DoneMBB); |
9390 | |
9391 | // DoneMBB |
9392 | MBB = DoneMBB; |
9393 | BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) |
9394 | .addReg(SystemZ::R15D); |
9395 | |
9396 | MI.eraseFromParent(); |
9397 | return DoneMBB; |
9398 | } |
9399 | |
9400 | SDValue SystemZTargetLowering:: |
9401 | getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { |
9402 | MachineFunction &MF = DAG.getMachineFunction(); |
9403 | auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); |
9404 | SDLoc DL(SP); |
9405 | return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, |
9406 | DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); |
9407 | } |
9408 | |
9409 | MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( |
9410 | MachineInstr &MI, MachineBasicBlock *MBB) const { |
9411 | switch (MI.getOpcode()) { |
9412 | case SystemZ::ADJCALLSTACKDOWN: |
9413 | case SystemZ::ADJCALLSTACKUP: |
9414 | return emitAdjCallStack(MI, BB: MBB); |
9415 | |
9416 | case SystemZ::Select32: |
9417 | case SystemZ::Select64: |
9418 | case SystemZ::Select128: |
9419 | case SystemZ::SelectF32: |
9420 | case SystemZ::SelectF64: |
9421 | case SystemZ::SelectF128: |
9422 | case SystemZ::SelectVR32: |
9423 | case SystemZ::SelectVR64: |
9424 | case SystemZ::SelectVR128: |
9425 | return emitSelect(MI, MBB); |
9426 | |
9427 | case SystemZ::CondStore8Mux: |
9428 | return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); |
9429 | case SystemZ::CondStore8MuxInv: |
9430 | return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); |
9431 | case SystemZ::CondStore16Mux: |
9432 | return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); |
9433 | case SystemZ::CondStore16MuxInv: |
9434 | return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); |
9435 | case SystemZ::CondStore32Mux: |
9436 | return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); |
9437 | case SystemZ::CondStore32MuxInv: |
9438 | return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); |
9439 | case SystemZ::CondStore8: |
9440 | return emitCondStore(MI, MBB, SystemZ::STC, 0, false); |
9441 | case SystemZ::CondStore8Inv: |
9442 | return emitCondStore(MI, MBB, SystemZ::STC, 0, true); |
9443 | case SystemZ::CondStore16: |
9444 | return emitCondStore(MI, MBB, SystemZ::STH, 0, false); |
9445 | case SystemZ::CondStore16Inv: |
9446 | return emitCondStore(MI, MBB, SystemZ::STH, 0, true); |
9447 | case SystemZ::CondStore32: |
9448 | return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); |
9449 | case SystemZ::CondStore32Inv: |
9450 | return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); |
9451 | case SystemZ::CondStore64: |
9452 | return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); |
9453 | case SystemZ::CondStore64Inv: |
9454 | return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); |
9455 | case SystemZ::CondStoreF32: |
9456 | return emitCondStore(MI, MBB, SystemZ::STE, 0, false); |
9457 | case SystemZ::CondStoreF32Inv: |
9458 | return emitCondStore(MI, MBB, SystemZ::STE, 0, true); |
9459 | case SystemZ::CondStoreF64: |
9460 | return emitCondStore(MI, MBB, SystemZ::STD, 0, false); |
9461 | case SystemZ::CondStoreF64Inv: |
9462 | return emitCondStore(MI, MBB, SystemZ::STD, 0, true); |
9463 | |
9464 | case SystemZ::SCmp128Hi: |
9465 | return emitICmp128Hi(MI, MBB, Unsigned: false); |
9466 | case SystemZ::UCmp128Hi: |
9467 | return emitICmp128Hi(MI, MBB, Unsigned: true); |
9468 | |
9469 | case SystemZ::PAIR128: |
9470 | return emitPair128(MI, MBB); |
9471 | case SystemZ::AEXT128: |
9472 | return emitExt128(MI, MBB, ClearEven: false); |
9473 | case SystemZ::ZEXT128: |
9474 | return emitExt128(MI, MBB, ClearEven: true); |
9475 | |
9476 | case SystemZ::ATOMIC_SWAPW: |
9477 | return emitAtomicLoadBinary(MI, MBB, BinOpcode: 0); |
9478 | |
9479 | case SystemZ::ATOMIC_LOADW_AR: |
9480 | return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); |
9481 | case SystemZ::ATOMIC_LOADW_AFI: |
9482 | return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); |
9483 | |
9484 | case SystemZ::ATOMIC_LOADW_SR: |
9485 | return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); |
9486 | |
9487 | case SystemZ::ATOMIC_LOADW_NR: |
9488 | return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); |
9489 | case SystemZ::ATOMIC_LOADW_NILH: |
9490 | return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); |
9491 | |
9492 | case SystemZ::ATOMIC_LOADW_OR: |
9493 | return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); |
9494 | case SystemZ::ATOMIC_LOADW_OILH: |
9495 | return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); |
9496 | |
9497 | case SystemZ::ATOMIC_LOADW_XR: |
9498 | return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); |
9499 | case SystemZ::ATOMIC_LOADW_XILF: |
9500 | return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); |
9501 | |
9502 | case SystemZ::ATOMIC_LOADW_NRi: |
9503 | return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); |
9504 | case SystemZ::ATOMIC_LOADW_NILHi: |
9505 | return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); |
9506 | |
9507 | case SystemZ::ATOMIC_LOADW_MIN: |
9508 | return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); |
9509 | case SystemZ::ATOMIC_LOADW_MAX: |
9510 | return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); |
9511 | case SystemZ::ATOMIC_LOADW_UMIN: |
9512 | return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); |
9513 | case SystemZ::ATOMIC_LOADW_UMAX: |
9514 | return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); |
9515 | |
9516 | case SystemZ::ATOMIC_CMP_SWAPW: |
9517 | return emitAtomicCmpSwapW(MI, MBB); |
9518 | case SystemZ::MVCImm: |
9519 | case SystemZ::MVCReg: |
9520 | return emitMemMemWrapper(MI, MBB, SystemZ::MVC); |
9521 | case SystemZ::NCImm: |
9522 | return emitMemMemWrapper(MI, MBB, SystemZ::NC); |
9523 | case SystemZ::OCImm: |
9524 | return emitMemMemWrapper(MI, MBB, SystemZ::OC); |
9525 | case SystemZ::XCImm: |
9526 | case SystemZ::XCReg: |
9527 | return emitMemMemWrapper(MI, MBB, SystemZ::XC); |
9528 | case SystemZ::CLCImm: |
9529 | case SystemZ::CLCReg: |
9530 | return emitMemMemWrapper(MI, MBB, SystemZ::CLC); |
9531 | case SystemZ::MemsetImmImm: |
9532 | case SystemZ::MemsetImmReg: |
9533 | case SystemZ::MemsetRegImm: |
9534 | case SystemZ::MemsetRegReg: |
9535 | return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); |
9536 | case SystemZ::CLSTLoop: |
9537 | return emitStringWrapper(MI, MBB, SystemZ::CLST); |
9538 | case SystemZ::MVSTLoop: |
9539 | return emitStringWrapper(MI, MBB, SystemZ::MVST); |
9540 | case SystemZ::SRSTLoop: |
9541 | return emitStringWrapper(MI, MBB, SystemZ::SRST); |
9542 | case SystemZ::TBEGIN: |
9543 | return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); |
9544 | case SystemZ::TBEGIN_nofloat: |
9545 | return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); |
9546 | case SystemZ::TBEGINC: |
9547 | return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); |
9548 | case SystemZ::LTEBRCompare_Pseudo: |
9549 | return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); |
9550 | case SystemZ::LTDBRCompare_Pseudo: |
9551 | return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); |
9552 | case SystemZ::LTXBRCompare_Pseudo: |
9553 | return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); |
9554 | |
9555 | case SystemZ::PROBED_ALLOCA: |
9556 | return emitProbedAlloca(MI, MBB); |
9557 | |
9558 | case TargetOpcode::STACKMAP: |
9559 | case TargetOpcode::PATCHPOINT: |
9560 | return emitPatchPoint(MI, MBB); |
9561 | |
9562 | default: |
9563 | llvm_unreachable("Unexpected instr type to insert" ); |
9564 | } |
9565 | } |
9566 | |
9567 | // This is only used by the isel schedulers, and is needed only to prevent |
9568 | // compiler from crashing when list-ilp is used. |
9569 | const TargetRegisterClass * |
9570 | SystemZTargetLowering::getRepRegClassFor(MVT VT) const { |
9571 | if (VT == MVT::Untyped) |
9572 | return &SystemZ::ADDR128BitRegClass; |
9573 | return TargetLowering::getRepRegClassFor(VT); |
9574 | } |
9575 | |
9576 | SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, |
9577 | SelectionDAG &DAG) const { |
9578 | SDLoc dl(Op); |
9579 | /* |
9580 | The rounding method is in FPC Byte 3 bits 6-7, and has the following |
9581 | settings: |
9582 | 00 Round to nearest |
9583 | 01 Round to 0 |
9584 | 10 Round to +inf |
9585 | 11 Round to -inf |
9586 | |
9587 | FLT_ROUNDS, on the other hand, expects the following: |
9588 | -1 Undefined |
9589 | 0 Round to 0 |
9590 | 1 Round to nearest |
9591 | 2 Round to +inf |
9592 | 3 Round to -inf |
9593 | */ |
9594 | |
9595 | // Save FPC to register. |
9596 | SDValue Chain = Op.getOperand(i: 0); |
9597 | SDValue EFPC( |
9598 | DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); |
9599 | Chain = EFPC.getValue(R: 1); |
9600 | |
9601 | // Transform as necessary |
9602 | SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, |
9603 | DAG.getConstant(3, dl, MVT::i32)); |
9604 | // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 |
9605 | SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, |
9606 | DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, |
9607 | DAG.getConstant(1, dl, MVT::i32))); |
9608 | |
9609 | SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, |
9610 | DAG.getConstant(1, dl, MVT::i32)); |
9611 | RetVal = DAG.getZExtOrTrunc(Op: RetVal, DL: dl, VT: Op.getValueType()); |
9612 | |
9613 | return DAG.getMergeValues(Ops: {RetVal, Chain}, dl); |
9614 | } |
9615 | |
9616 | SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, |
9617 | SelectionDAG &DAG) const { |
9618 | EVT VT = Op.getValueType(); |
9619 | Op = Op.getOperand(i: 0); |
9620 | EVT OpVT = Op.getValueType(); |
9621 | |
9622 | assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector." ); |
9623 | |
9624 | SDLoc DL(Op); |
9625 | |
9626 | // load a 0 vector for the third operand of VSUM. |
9627 | SDValue Zero = DAG.getSplatBuildVector(VT: OpVT, DL, Op: DAG.getConstant(Val: 0, DL, VT)); |
9628 | |
9629 | // execute VSUM. |
9630 | switch (OpVT.getScalarSizeInBits()) { |
9631 | case 8: |
9632 | case 16: |
9633 | Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); |
9634 | LLVM_FALLTHROUGH; |
9635 | case 32: |
9636 | case 64: |
9637 | Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, |
9638 | DAG.getBitcast(Op.getValueType(), Zero)); |
9639 | break; |
9640 | case 128: |
9641 | break; // VSUM over v1i128 should not happen and would be a noop |
9642 | default: |
9643 | llvm_unreachable("Unexpected scalar size." ); |
9644 | } |
9645 | // Cast to original vector type, retrieve last element. |
9646 | return DAG.getNode( |
9647 | ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), |
9648 | DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); |
9649 | } |
9650 | |