1 | //===-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines an instruction selector for the SystemZ target. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "SystemZTargetMachine.h" |
14 | #include "SystemZISelLowering.h" |
15 | #include "llvm/Analysis/AliasAnalysis.h" |
16 | #include "llvm/CodeGen/SelectionDAGISel.h" |
17 | #include "llvm/Support/Debug.h" |
18 | #include "llvm/Support/KnownBits.h" |
19 | #include "llvm/Support/raw_ostream.h" |
20 | |
21 | using namespace llvm; |
22 | |
23 | #define DEBUG_TYPE "systemz-isel" |
24 | #define PASS_NAME "SystemZ DAG->DAG Pattern Instruction Selection" |
25 | |
26 | namespace { |
27 | // Used to build addressing modes. |
28 | struct SystemZAddressingMode { |
29 | // The shape of the address. |
30 | enum AddrForm { |
31 | // base+displacement |
32 | FormBD, |
33 | |
34 | // base+displacement+index for load and store operands |
35 | FormBDXNormal, |
36 | |
37 | // base+displacement+index for load address operands |
38 | FormBDXLA, |
39 | |
40 | // base+displacement+index+ADJDYNALLOC |
41 | FormBDXDynAlloc |
42 | }; |
43 | AddrForm Form; |
44 | |
45 | // The type of displacement. The enum names here correspond directly |
46 | // to the definitions in SystemZOperand.td. We could split them into |
47 | // flags -- single/pair, 128-bit, etc. -- but it hardly seems worth it. |
48 | enum DispRange { |
49 | Disp12Only, |
50 | Disp12Pair, |
51 | Disp20Only, |
52 | Disp20Only128, |
53 | Disp20Pair |
54 | }; |
55 | DispRange DR; |
56 | |
57 | // The parts of the address. The address is equivalent to: |
58 | // |
59 | // Base + Disp + Index + (IncludesDynAlloc ? ADJDYNALLOC : 0) |
60 | SDValue Base; |
61 | int64_t Disp; |
62 | SDValue Index; |
63 | bool IncludesDynAlloc; |
64 | |
65 | SystemZAddressingMode(AddrForm form, DispRange dr) |
66 | : Form(form), DR(dr), Disp(0), IncludesDynAlloc(false) {} |
67 | |
68 | // True if the address can have an index register. |
69 | bool hasIndexField() { return Form != FormBD; } |
70 | |
71 | // True if the address can (and must) include ADJDYNALLOC. |
72 | bool isDynAlloc() { return Form == FormBDXDynAlloc; } |
73 | |
74 | void dump(const llvm::SelectionDAG *DAG) { |
75 | errs() << "SystemZAddressingMode " << this << '\n'; |
76 | |
77 | errs() << " Base " ; |
78 | if (Base.getNode()) |
79 | Base.getNode()->dump(G: DAG); |
80 | else |
81 | errs() << "null\n" ; |
82 | |
83 | if (hasIndexField()) { |
84 | errs() << " Index " ; |
85 | if (Index.getNode()) |
86 | Index.getNode()->dump(G: DAG); |
87 | else |
88 | errs() << "null\n" ; |
89 | } |
90 | |
91 | errs() << " Disp " << Disp; |
92 | if (IncludesDynAlloc) |
93 | errs() << " + ADJDYNALLOC" ; |
94 | errs() << '\n'; |
95 | } |
96 | }; |
97 | |
98 | // Return a mask with Count low bits set. |
99 | static uint64_t allOnes(unsigned int Count) { |
100 | assert(Count <= 64); |
101 | if (Count > 63) |
102 | return UINT64_MAX; |
103 | return (uint64_t(1) << Count) - 1; |
104 | } |
105 | |
106 | // Represents operands 2 to 5 of the ROTATE AND ... SELECTED BITS operation |
107 | // given by Opcode. The operands are: Input (R2), Start (I3), End (I4) and |
108 | // Rotate (I5). The combined operand value is effectively: |
109 | // |
110 | // (or (rotl Input, Rotate), ~Mask) |
111 | // |
112 | // for RNSBG and: |
113 | // |
114 | // (and (rotl Input, Rotate), Mask) |
115 | // |
116 | // otherwise. The output value has BitSize bits, although Input may be |
117 | // narrower (in which case the upper bits are don't care), or wider (in which |
118 | // case the result will be truncated as part of the operation). |
119 | struct RxSBGOperands { |
120 | RxSBGOperands(unsigned Op, SDValue N) |
121 | : Opcode(Op), BitSize(N.getValueSizeInBits()), |
122 | Mask(allOnes(Count: BitSize)), Input(N), Start(64 - BitSize), End(63), |
123 | Rotate(0) {} |
124 | |
125 | unsigned Opcode; |
126 | unsigned BitSize; |
127 | uint64_t Mask; |
128 | SDValue Input; |
129 | unsigned Start; |
130 | unsigned End; |
131 | unsigned Rotate; |
132 | }; |
133 | |
134 | class SystemZDAGToDAGISel : public SelectionDAGISel { |
135 | const SystemZSubtarget *Subtarget; |
136 | |
137 | // Used by SystemZOperands.td to create integer constants. |
138 | inline SDValue getImm(const SDNode *Node, uint64_t Imm) const { |
139 | return CurDAG->getTargetConstant(Val: Imm, DL: SDLoc(Node), VT: Node->getValueType(ResNo: 0)); |
140 | } |
141 | |
142 | const SystemZTargetMachine &getTargetMachine() const { |
143 | return static_cast<const SystemZTargetMachine &>(TM); |
144 | } |
145 | |
146 | const SystemZInstrInfo *getInstrInfo() const { |
147 | return Subtarget->getInstrInfo(); |
148 | } |
149 | |
150 | // Try to fold more of the base or index of AM into AM, where IsBase |
151 | // selects between the base and index. |
152 | bool expandAddress(SystemZAddressingMode &AM, bool IsBase) const; |
153 | |
154 | // Try to describe N in AM, returning true on success. |
155 | bool selectAddress(SDValue N, SystemZAddressingMode &AM) const; |
156 | |
157 | // Extract individual target operands from matched address AM. |
158 | void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, |
159 | SDValue &Base, SDValue &Disp) const; |
160 | void getAddressOperands(const SystemZAddressingMode &AM, EVT VT, |
161 | SDValue &Base, SDValue &Disp, SDValue &Index) const; |
162 | |
163 | // Try to match Addr as a FormBD address with displacement type DR. |
164 | // Return true on success, storing the base and displacement in |
165 | // Base and Disp respectively. |
166 | bool selectBDAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, |
167 | SDValue &Base, SDValue &Disp) const; |
168 | |
169 | // Try to match Addr as a FormBDX address with displacement type DR. |
170 | // Return true on success and if the result had no index. Store the |
171 | // base and displacement in Base and Disp respectively. |
172 | bool selectMVIAddr(SystemZAddressingMode::DispRange DR, SDValue Addr, |
173 | SDValue &Base, SDValue &Disp) const; |
174 | |
175 | // Try to match Addr as a FormBDX* address of form Form with |
176 | // displacement type DR. Return true on success, storing the base, |
177 | // displacement and index in Base, Disp and Index respectively. |
178 | bool selectBDXAddr(SystemZAddressingMode::AddrForm Form, |
179 | SystemZAddressingMode::DispRange DR, SDValue Addr, |
180 | SDValue &Base, SDValue &Disp, SDValue &Index) const; |
181 | |
182 | // PC-relative address matching routines used by SystemZOperands.td. |
183 | bool selectPCRelAddress(SDValue Addr, SDValue &Target) const { |
184 | if (SystemZISD::isPCREL(Opcode: Addr.getOpcode())) { |
185 | Target = Addr.getOperand(i: 0); |
186 | return true; |
187 | } |
188 | return false; |
189 | } |
190 | |
191 | // BD matching routines used by SystemZOperands.td. |
192 | bool selectBDAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
193 | return selectBDAddr(DR: SystemZAddressingMode::Disp12Only, Addr, Base, Disp); |
194 | } |
195 | bool selectBDAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
196 | return selectBDAddr(DR: SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); |
197 | } |
198 | bool selectBDAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
199 | return selectBDAddr(DR: SystemZAddressingMode::Disp20Only, Addr, Base, Disp); |
200 | } |
201 | bool selectBDAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
202 | return selectBDAddr(DR: SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); |
203 | } |
204 | |
205 | // MVI matching routines used by SystemZOperands.td. |
206 | bool selectMVIAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
207 | return selectMVIAddr(DR: SystemZAddressingMode::Disp12Pair, Addr, Base, Disp); |
208 | } |
209 | bool selectMVIAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp) const { |
210 | return selectMVIAddr(DR: SystemZAddressingMode::Disp20Pair, Addr, Base, Disp); |
211 | } |
212 | |
213 | // BDX matching routines used by SystemZOperands.td. |
214 | bool selectBDXAddr12Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
215 | SDValue &Index) const { |
216 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
217 | DR: SystemZAddressingMode::Disp12Only, |
218 | Addr, Base, Disp, Index); |
219 | } |
220 | bool selectBDXAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
221 | SDValue &Index) const { |
222 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
223 | DR: SystemZAddressingMode::Disp12Pair, |
224 | Addr, Base, Disp, Index); |
225 | } |
226 | bool selectDynAlloc12Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
227 | SDValue &Index) const { |
228 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXDynAlloc, |
229 | DR: SystemZAddressingMode::Disp12Only, |
230 | Addr, Base, Disp, Index); |
231 | } |
232 | bool selectBDXAddr20Only(SDValue Addr, SDValue &Base, SDValue &Disp, |
233 | SDValue &Index) const { |
234 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
235 | DR: SystemZAddressingMode::Disp20Only, |
236 | Addr, Base, Disp, Index); |
237 | } |
238 | bool selectBDXAddr20Only128(SDValue Addr, SDValue &Base, SDValue &Disp, |
239 | SDValue &Index) const { |
240 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
241 | DR: SystemZAddressingMode::Disp20Only128, |
242 | Addr, Base, Disp, Index); |
243 | } |
244 | bool selectBDXAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
245 | SDValue &Index) const { |
246 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXNormal, |
247 | DR: SystemZAddressingMode::Disp20Pair, |
248 | Addr, Base, Disp, Index); |
249 | } |
250 | bool selectLAAddr12Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
251 | SDValue &Index) const { |
252 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXLA, |
253 | DR: SystemZAddressingMode::Disp12Pair, |
254 | Addr, Base, Disp, Index); |
255 | } |
256 | bool selectLAAddr20Pair(SDValue Addr, SDValue &Base, SDValue &Disp, |
257 | SDValue &Index) const { |
258 | return selectBDXAddr(Form: SystemZAddressingMode::FormBDXLA, |
259 | DR: SystemZAddressingMode::Disp20Pair, |
260 | Addr, Base, Disp, Index); |
261 | } |
262 | |
263 | // Try to match Addr as an address with a base, 12-bit displacement |
264 | // and index, where the index is element Elem of a vector. |
265 | // Return true on success, storing the base, displacement and vector |
266 | // in Base, Disp and Index respectively. |
267 | bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, |
268 | SDValue &Disp, SDValue &Index) const; |
269 | |
270 | // Check whether (or Op (and X InsertMask)) is effectively an insertion |
271 | // of X into bits InsertMask of some Y != Op. Return true if so and |
272 | // set Op to that Y. |
273 | bool detectOrAndInsertion(SDValue &Op, uint64_t InsertMask) const; |
274 | |
275 | // Try to update RxSBG so that only the bits of RxSBG.Input in Mask are used. |
276 | // Return true on success. |
277 | bool refineRxSBGMask(RxSBGOperands &RxSBG, uint64_t Mask) const; |
278 | |
279 | // Try to fold some of RxSBG.Input into other fields of RxSBG. |
280 | // Return true on success. |
281 | bool expandRxSBG(RxSBGOperands &RxSBG) const; |
282 | |
283 | // Return an undefined value of type VT. |
284 | SDValue getUNDEF(const SDLoc &DL, EVT VT) const; |
285 | |
286 | // Convert N to VT, if it isn't already. |
287 | SDValue convertTo(const SDLoc &DL, EVT VT, SDValue N) const; |
288 | |
289 | // Try to implement AND or shift node N using RISBG with the zero flag set. |
290 | // Return the selected node on success, otherwise return null. |
291 | bool tryRISBGZero(SDNode *N); |
292 | |
293 | // Try to use RISBG or Opcode to implement OR or XOR node N. |
294 | // Return the selected node on success, otherwise return null. |
295 | bool tryRxSBG(SDNode *N, unsigned Opcode); |
296 | |
297 | // If Op0 is null, then Node is a constant that can be loaded using: |
298 | // |
299 | // (Opcode UpperVal LowerVal) |
300 | // |
301 | // If Op0 is nonnull, then Node can be implemented using: |
302 | // |
303 | // (Opcode (Opcode Op0 UpperVal) LowerVal) |
304 | void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, |
305 | uint64_t UpperVal, uint64_t LowerVal); |
306 | |
307 | void loadVectorConstant(const SystemZVectorConstantInfo &VCI, |
308 | SDNode *Node); |
309 | |
310 | SDNode *loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL); |
311 | |
312 | // Try to use gather instruction Opcode to implement vector insertion N. |
313 | bool tryGather(SDNode *N, unsigned Opcode); |
314 | |
315 | // Try to use scatter instruction Opcode to implement store Store. |
316 | bool tryScatter(StoreSDNode *Store, unsigned Opcode); |
317 | |
318 | // Change a chain of {load; op; store} of the same value into a simple op |
319 | // through memory of that value, if the uses of the modified value and its |
320 | // address are suitable. |
321 | bool tryFoldLoadStoreIntoMemOperand(SDNode *Node); |
322 | |
323 | // Return true if Load and Store are loads and stores of the same size |
324 | // and are guaranteed not to overlap. Such operations can be implemented |
325 | // using block (SS-format) instructions. |
326 | // |
327 | // Partial overlap would lead to incorrect code, since the block operations |
328 | // are logically bytewise, even though they have a fast path for the |
329 | // non-overlapping case. We also need to avoid full overlap (i.e. two |
330 | // addresses that might be equal at run time) because although that case |
331 | // would be handled correctly, it might be implemented by millicode. |
332 | bool canUseBlockOperation(StoreSDNode *Store, LoadSDNode *Load) const; |
333 | |
334 | // N is a (store (load Y), X) pattern. Return true if it can use an MVC |
335 | // from Y to X. |
336 | bool storeLoadCanUseMVC(SDNode *N) const; |
337 | |
338 | // N is a (store (op (load A[0]), (load A[1])), X) pattern. Return true |
339 | // if A[1 - I] == X and if N can use a block operation like NC from A[I] |
340 | // to X. |
341 | bool storeLoadCanUseBlockBinary(SDNode *N, unsigned I) const; |
342 | |
343 | // Return true if N (a load or a store) fullfills the alignment |
344 | // requirements for a PC-relative access. |
345 | bool storeLoadIsAligned(SDNode *N) const; |
346 | |
347 | // Return the load extension type of a load or atomic load. |
348 | ISD::LoadExtType getLoadExtType(SDNode *N) const; |
349 | |
350 | // Try to expand a boolean SELECT_CCMASK using an IPM sequence. |
351 | SDValue expandSelectBoolean(SDNode *Node); |
352 | |
353 | public: |
354 | static char ID; |
355 | |
356 | SystemZDAGToDAGISel() = delete; |
357 | |
358 | SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOptLevel OptLevel) |
359 | : SelectionDAGISel(ID, TM, OptLevel) {} |
360 | |
361 | bool runOnMachineFunction(MachineFunction &MF) override { |
362 | const Function &F = MF.getFunction(); |
363 | if (F.getFnAttribute(Kind: "fentry-call" ).getValueAsString() != "true" ) { |
364 | if (F.hasFnAttribute(Kind: "mnop-mcount" )) |
365 | report_fatal_error(reason: "mnop-mcount only supported with fentry-call" ); |
366 | if (F.hasFnAttribute(Kind: "mrecord-mcount" )) |
367 | report_fatal_error(reason: "mrecord-mcount only supported with fentry-call" ); |
368 | } |
369 | |
370 | Subtarget = &MF.getSubtarget<SystemZSubtarget>(); |
371 | return SelectionDAGISel::runOnMachineFunction(MF); |
372 | } |
373 | |
374 | // Override SelectionDAGISel. |
375 | void Select(SDNode *Node) override; |
376 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
377 | InlineAsm::ConstraintCode ConstraintID, |
378 | std::vector<SDValue> &OutOps) override; |
379 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; |
380 | void PreprocessISelDAG() override; |
381 | |
382 | // Include the pieces autogenerated from the target description. |
383 | #include "SystemZGenDAGISel.inc" |
384 | }; |
385 | } // end anonymous namespace |
386 | |
387 | char SystemZDAGToDAGISel::ID = 0; |
388 | |
389 | INITIALIZE_PASS(SystemZDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false) |
390 | |
391 | FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, |
392 | CodeGenOptLevel OptLevel) { |
393 | return new SystemZDAGToDAGISel(TM, OptLevel); |
394 | } |
395 | |
396 | // Return true if Val should be selected as a displacement for an address |
397 | // with range DR. Here we're interested in the range of both the instruction |
398 | // described by DR and of any pairing instruction. |
399 | static bool selectDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { |
400 | switch (DR) { |
401 | case SystemZAddressingMode::Disp12Only: |
402 | return isUInt<12>(x: Val); |
403 | |
404 | case SystemZAddressingMode::Disp12Pair: |
405 | case SystemZAddressingMode::Disp20Only: |
406 | case SystemZAddressingMode::Disp20Pair: |
407 | return isInt<20>(x: Val); |
408 | |
409 | case SystemZAddressingMode::Disp20Only128: |
410 | return isInt<20>(x: Val) && isInt<20>(x: Val + 8); |
411 | } |
412 | llvm_unreachable("Unhandled displacement range" ); |
413 | } |
414 | |
415 | // Change the base or index in AM to Value, where IsBase selects |
416 | // between the base and index. |
417 | static void changeComponent(SystemZAddressingMode &AM, bool IsBase, |
418 | SDValue Value) { |
419 | if (IsBase) |
420 | AM.Base = Value; |
421 | else |
422 | AM.Index = Value; |
423 | } |
424 | |
425 | // The base or index of AM is equivalent to Value + ADJDYNALLOC, |
426 | // where IsBase selects between the base and index. Try to fold the |
427 | // ADJDYNALLOC into AM. |
428 | static bool expandAdjDynAlloc(SystemZAddressingMode &AM, bool IsBase, |
429 | SDValue Value) { |
430 | if (AM.isDynAlloc() && !AM.IncludesDynAlloc) { |
431 | changeComponent(AM, IsBase, Value); |
432 | AM.IncludesDynAlloc = true; |
433 | return true; |
434 | } |
435 | return false; |
436 | } |
437 | |
438 | // The base of AM is equivalent to Base + Index. Try to use Index as |
439 | // the index register. |
440 | static bool expandIndex(SystemZAddressingMode &AM, SDValue Base, |
441 | SDValue Index) { |
442 | if (AM.hasIndexField() && !AM.Index.getNode()) { |
443 | AM.Base = Base; |
444 | AM.Index = Index; |
445 | return true; |
446 | } |
447 | return false; |
448 | } |
449 | |
450 | // The base or index of AM is equivalent to Op0 + Op1, where IsBase selects |
451 | // between the base and index. Try to fold Op1 into AM's displacement. |
452 | static bool expandDisp(SystemZAddressingMode &AM, bool IsBase, |
453 | SDValue Op0, uint64_t Op1) { |
454 | // First try adjusting the displacement. |
455 | int64_t TestDisp = AM.Disp + Op1; |
456 | if (selectDisp(DR: AM.DR, Val: TestDisp)) { |
457 | changeComponent(AM, IsBase, Value: Op0); |
458 | AM.Disp = TestDisp; |
459 | return true; |
460 | } |
461 | |
462 | // We could consider forcing the displacement into a register and |
463 | // using it as an index, but it would need to be carefully tuned. |
464 | return false; |
465 | } |
466 | |
467 | bool SystemZDAGToDAGISel::expandAddress(SystemZAddressingMode &AM, |
468 | bool IsBase) const { |
469 | SDValue N = IsBase ? AM.Base : AM.Index; |
470 | unsigned Opcode = N.getOpcode(); |
471 | // Look through no-op truncations. |
472 | if (Opcode == ISD::TRUNCATE && N.getOperand(i: 0).getValueSizeInBits() <= 64) { |
473 | N = N.getOperand(i: 0); |
474 | Opcode = N.getOpcode(); |
475 | } |
476 | if (Opcode == ISD::ADD || CurDAG->isBaseWithConstantOffset(Op: N)) { |
477 | SDValue Op0 = N.getOperand(i: 0); |
478 | SDValue Op1 = N.getOperand(i: 1); |
479 | |
480 | unsigned Op0Code = Op0->getOpcode(); |
481 | unsigned Op1Code = Op1->getOpcode(); |
482 | |
483 | if (Op0Code == SystemZISD::ADJDYNALLOC) |
484 | return expandAdjDynAlloc(AM, IsBase, Value: Op1); |
485 | if (Op1Code == SystemZISD::ADJDYNALLOC) |
486 | return expandAdjDynAlloc(AM, IsBase, Value: Op0); |
487 | |
488 | if (Op0Code == ISD::Constant) |
489 | return expandDisp(AM, IsBase, Op0: Op1, |
490 | Op1: cast<ConstantSDNode>(Val&: Op0)->getSExtValue()); |
491 | if (Op1Code == ISD::Constant) |
492 | return expandDisp(AM, IsBase, Op0, |
493 | Op1: cast<ConstantSDNode>(Val&: Op1)->getSExtValue()); |
494 | |
495 | if (IsBase && expandIndex(AM, Base: Op0, Index: Op1)) |
496 | return true; |
497 | } |
498 | if (Opcode == SystemZISD::PCREL_OFFSET) { |
499 | SDValue Full = N.getOperand(i: 0); |
500 | SDValue Base = N.getOperand(i: 1); |
501 | SDValue Anchor = Base.getOperand(i: 0); |
502 | uint64_t Offset = (cast<GlobalAddressSDNode>(Val&: Full)->getOffset() - |
503 | cast<GlobalAddressSDNode>(Val&: Anchor)->getOffset()); |
504 | return expandDisp(AM, IsBase, Op0: Base, Op1: Offset); |
505 | } |
506 | return false; |
507 | } |
508 | |
509 | // Return true if an instruction with displacement range DR should be |
510 | // used for displacement value Val. selectDisp(DR, Val) must already hold. |
511 | static bool isValidDisp(SystemZAddressingMode::DispRange DR, int64_t Val) { |
512 | assert(selectDisp(DR, Val) && "Invalid displacement" ); |
513 | switch (DR) { |
514 | case SystemZAddressingMode::Disp12Only: |
515 | case SystemZAddressingMode::Disp20Only: |
516 | case SystemZAddressingMode::Disp20Only128: |
517 | return true; |
518 | |
519 | case SystemZAddressingMode::Disp12Pair: |
520 | // Use the other instruction if the displacement is too large. |
521 | return isUInt<12>(x: Val); |
522 | |
523 | case SystemZAddressingMode::Disp20Pair: |
524 | // Use the other instruction if the displacement is small enough. |
525 | return !isUInt<12>(x: Val); |
526 | } |
527 | llvm_unreachable("Unhandled displacement range" ); |
528 | } |
529 | |
530 | // Return true if Base + Disp + Index should be performed by LA(Y). |
531 | static bool shouldUseLA(SDNode *Base, int64_t Disp, SDNode *Index) { |
532 | // Don't use LA(Y) for constants. |
533 | if (!Base) |
534 | return false; |
535 | |
536 | // Always use LA(Y) for frame addresses, since we know that the destination |
537 | // register is almost always (perhaps always) going to be different from |
538 | // the frame register. |
539 | if (Base->getOpcode() == ISD::FrameIndex) |
540 | return true; |
541 | |
542 | if (Disp) { |
543 | // Always use LA(Y) if there is a base, displacement and index. |
544 | if (Index) |
545 | return true; |
546 | |
547 | // Always use LA if the displacement is small enough. It should always |
548 | // be no worse than AGHI (and better if it avoids a move). |
549 | if (isUInt<12>(x: Disp)) |
550 | return true; |
551 | |
552 | // For similar reasons, always use LAY if the constant is too big for AGHI. |
553 | // LAY should be no worse than AGFI. |
554 | if (!isInt<16>(x: Disp)) |
555 | return true; |
556 | } else { |
557 | // Don't use LA for plain registers. |
558 | if (!Index) |
559 | return false; |
560 | |
561 | // Don't use LA for plain addition if the index operand is only used |
562 | // once. It should be a natural two-operand addition in that case. |
563 | if (Index->hasOneUse()) |
564 | return false; |
565 | |
566 | // Prefer addition if the second operation is sign-extended, in the |
567 | // hope of using AGF. |
568 | unsigned IndexOpcode = Index->getOpcode(); |
569 | if (IndexOpcode == ISD::SIGN_EXTEND || |
570 | IndexOpcode == ISD::SIGN_EXTEND_INREG) |
571 | return false; |
572 | } |
573 | |
574 | // Don't use LA for two-operand addition if either operand is only |
575 | // used once. The addition instructions are better in that case. |
576 | if (Base->hasOneUse()) |
577 | return false; |
578 | |
579 | return true; |
580 | } |
581 | |
582 | // Return true if Addr is suitable for AM, updating AM if so. |
583 | bool SystemZDAGToDAGISel::selectAddress(SDValue Addr, |
584 | SystemZAddressingMode &AM) const { |
585 | // Start out assuming that the address will need to be loaded separately, |
586 | // then try to extend it as much as we can. |
587 | AM.Base = Addr; |
588 | |
589 | // First try treating the address as a constant. |
590 | if (Addr.getOpcode() == ISD::Constant && |
591 | expandDisp(AM, IsBase: true, Op0: SDValue(), |
592 | Op1: cast<ConstantSDNode>(Val&: Addr)->getSExtValue())) |
593 | ; |
594 | // Also see if it's a bare ADJDYNALLOC. |
595 | else if (Addr.getOpcode() == SystemZISD::ADJDYNALLOC && |
596 | expandAdjDynAlloc(AM, IsBase: true, Value: SDValue())) |
597 | ; |
598 | else |
599 | // Otherwise try expanding each component. |
600 | while (expandAddress(AM, IsBase: true) || |
601 | (AM.Index.getNode() && expandAddress(AM, IsBase: false))) |
602 | continue; |
603 | |
604 | // Reject cases where it isn't profitable to use LA(Y). |
605 | if (AM.Form == SystemZAddressingMode::FormBDXLA && |
606 | !shouldUseLA(Base: AM.Base.getNode(), Disp: AM.Disp, Index: AM.Index.getNode())) |
607 | return false; |
608 | |
609 | // Reject cases where the other instruction in a pair should be used. |
610 | if (!isValidDisp(DR: AM.DR, Val: AM.Disp)) |
611 | return false; |
612 | |
613 | // Make sure that ADJDYNALLOC is included where necessary. |
614 | if (AM.isDynAlloc() && !AM.IncludesDynAlloc) |
615 | return false; |
616 | |
617 | LLVM_DEBUG(AM.dump(CurDAG)); |
618 | return true; |
619 | } |
620 | |
621 | // Insert a node into the DAG at least before Pos. This will reposition |
622 | // the node as needed, and will assign it a node ID that is <= Pos's ID. |
623 | // Note that this does *not* preserve the uniqueness of node IDs! |
624 | // The selection DAG must no longer depend on their uniqueness when this |
625 | // function is used. |
626 | static void insertDAGNode(SelectionDAG *DAG, SDNode *Pos, SDValue N) { |
627 | if (N->getNodeId() == -1 || |
628 | (SelectionDAGISel::getUninvalidatedNodeId(N: N.getNode()) > |
629 | SelectionDAGISel::getUninvalidatedNodeId(N: Pos))) { |
630 | DAG->RepositionNode(Position: Pos->getIterator(), N: N.getNode()); |
631 | // Mark Node as invalid for pruning as after this it may be a successor to a |
632 | // selected node but otherwise be in the same position of Pos. |
633 | // Conservatively mark it with the same -abs(Id) to assure node id |
634 | // invariant is preserved. |
635 | N->setNodeId(Pos->getNodeId()); |
636 | SelectionDAGISel::InvalidateNodeId(N: N.getNode()); |
637 | } |
638 | } |
639 | |
640 | void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, |
641 | EVT VT, SDValue &Base, |
642 | SDValue &Disp) const { |
643 | Base = AM.Base; |
644 | if (!Base.getNode()) |
645 | // Register 0 means "no base". This is mostly useful for shifts. |
646 | Base = CurDAG->getRegister(Reg: 0, VT); |
647 | else if (Base.getOpcode() == ISD::FrameIndex) { |
648 | // Lower a FrameIndex to a TargetFrameIndex. |
649 | int64_t FrameIndex = cast<FrameIndexSDNode>(Val&: Base)->getIndex(); |
650 | Base = CurDAG->getTargetFrameIndex(FI: FrameIndex, VT); |
651 | } else if (Base.getValueType() != VT) { |
652 | // Truncate values from i64 to i32, for shifts. |
653 | assert(VT == MVT::i32 && Base.getValueType() == MVT::i64 && |
654 | "Unexpected truncation" ); |
655 | SDLoc DL(Base); |
656 | SDValue Trunc = CurDAG->getNode(Opcode: ISD::TRUNCATE, DL, VT, Operand: Base); |
657 | insertDAGNode(DAG: CurDAG, Pos: Base.getNode(), N: Trunc); |
658 | Base = Trunc; |
659 | } |
660 | |
661 | // Lower the displacement to a TargetConstant. |
662 | Disp = CurDAG->getTargetConstant(Val: AM.Disp, DL: SDLoc(Base), VT); |
663 | } |
664 | |
665 | void SystemZDAGToDAGISel::getAddressOperands(const SystemZAddressingMode &AM, |
666 | EVT VT, SDValue &Base, |
667 | SDValue &Disp, |
668 | SDValue &Index) const { |
669 | getAddressOperands(AM, VT, Base, Disp); |
670 | |
671 | Index = AM.Index; |
672 | if (!Index.getNode()) |
673 | // Register 0 means "no index". |
674 | Index = CurDAG->getRegister(Reg: 0, VT); |
675 | } |
676 | |
677 | bool SystemZDAGToDAGISel::selectBDAddr(SystemZAddressingMode::DispRange DR, |
678 | SDValue Addr, SDValue &Base, |
679 | SDValue &Disp) const { |
680 | SystemZAddressingMode AM(SystemZAddressingMode::FormBD, DR); |
681 | if (!selectAddress(Addr, AM)) |
682 | return false; |
683 | |
684 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp); |
685 | return true; |
686 | } |
687 | |
688 | bool SystemZDAGToDAGISel::selectMVIAddr(SystemZAddressingMode::DispRange DR, |
689 | SDValue Addr, SDValue &Base, |
690 | SDValue &Disp) const { |
691 | SystemZAddressingMode AM(SystemZAddressingMode::FormBDXNormal, DR); |
692 | if (!selectAddress(Addr, AM) || AM.Index.getNode()) |
693 | return false; |
694 | |
695 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp); |
696 | return true; |
697 | } |
698 | |
699 | bool SystemZDAGToDAGISel::selectBDXAddr(SystemZAddressingMode::AddrForm Form, |
700 | SystemZAddressingMode::DispRange DR, |
701 | SDValue Addr, SDValue &Base, |
702 | SDValue &Disp, SDValue &Index) const { |
703 | SystemZAddressingMode AM(Form, DR); |
704 | if (!selectAddress(Addr, AM)) |
705 | return false; |
706 | |
707 | getAddressOperands(AM, VT: Addr.getValueType(), Base, Disp, Index); |
708 | return true; |
709 | } |
710 | |
711 | bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, |
712 | SDValue &Base, |
713 | SDValue &Disp, |
714 | SDValue &Index) const { |
715 | SDValue Regs[2]; |
716 | if (selectBDXAddr12Only(Addr, Base&: Regs[0], Disp, Index&: Regs[1]) && |
717 | Regs[0].getNode() && Regs[1].getNode()) { |
718 | for (unsigned int I = 0; I < 2; ++I) { |
719 | Base = Regs[I]; |
720 | Index = Regs[1 - I]; |
721 | // We can't tell here whether the index vector has the right type |
722 | // for the access; the caller needs to do that instead. |
723 | if (Index.getOpcode() == ISD::ZERO_EXTEND) |
724 | Index = Index.getOperand(i: 0); |
725 | if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
726 | Index.getOperand(i: 1) == Elem) { |
727 | Index = Index.getOperand(i: 0); |
728 | return true; |
729 | } |
730 | } |
731 | } |
732 | return false; |
733 | } |
734 | |
735 | bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, |
736 | uint64_t InsertMask) const { |
737 | // We're only interested in cases where the insertion is into some operand |
738 | // of Op, rather than into Op itself. The only useful case is an AND. |
739 | if (Op.getOpcode() != ISD::AND) |
740 | return false; |
741 | |
742 | // We need a constant mask. |
743 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: Op.getOperand(i: 1).getNode()); |
744 | if (!MaskNode) |
745 | return false; |
746 | |
747 | // It's not an insertion of Op.getOperand(0) if the two masks overlap. |
748 | uint64_t AndMask = MaskNode->getZExtValue(); |
749 | if (InsertMask & AndMask) |
750 | return false; |
751 | |
752 | // It's only an insertion if all bits are covered or are known to be zero. |
753 | // The inner check covers all cases but is more expensive. |
754 | uint64_t Used = allOnes(Count: Op.getValueSizeInBits()); |
755 | if (Used != (AndMask | InsertMask)) { |
756 | KnownBits Known = CurDAG->computeKnownBits(Op: Op.getOperand(i: 0)); |
757 | if (Used != (AndMask | InsertMask | Known.Zero.getZExtValue())) |
758 | return false; |
759 | } |
760 | |
761 | Op = Op.getOperand(i: 0); |
762 | return true; |
763 | } |
764 | |
765 | bool SystemZDAGToDAGISel::refineRxSBGMask(RxSBGOperands &RxSBG, |
766 | uint64_t Mask) const { |
767 | const SystemZInstrInfo *TII = getInstrInfo(); |
768 | if (RxSBG.Rotate != 0) |
769 | Mask = (Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate)); |
770 | Mask &= RxSBG.Mask; |
771 | if (TII->isRxSBGMask(Mask, BitSize: RxSBG.BitSize, Start&: RxSBG.Start, End&: RxSBG.End)) { |
772 | RxSBG.Mask = Mask; |
773 | return true; |
774 | } |
775 | return false; |
776 | } |
777 | |
778 | // Return true if any bits of (RxSBG.Input & Mask) are significant. |
779 | static bool maskMatters(RxSBGOperands &RxSBG, uint64_t Mask) { |
780 | // Rotate the mask in the same way as RxSBG.Input is rotated. |
781 | if (RxSBG.Rotate != 0) |
782 | Mask = ((Mask << RxSBG.Rotate) | (Mask >> (64 - RxSBG.Rotate))); |
783 | return (Mask & RxSBG.Mask) != 0; |
784 | } |
785 | |
786 | bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const { |
787 | SDValue N = RxSBG.Input; |
788 | unsigned Opcode = N.getOpcode(); |
789 | switch (Opcode) { |
790 | case ISD::TRUNCATE: { |
791 | if (RxSBG.Opcode == SystemZ::RNSBG) |
792 | return false; |
793 | if (N.getOperand(i: 0).getValueSizeInBits() > 64) |
794 | return false; |
795 | uint64_t BitSize = N.getValueSizeInBits(); |
796 | uint64_t Mask = allOnes(Count: BitSize); |
797 | if (!refineRxSBGMask(RxSBG, Mask)) |
798 | return false; |
799 | RxSBG.Input = N.getOperand(i: 0); |
800 | return true; |
801 | } |
802 | case ISD::AND: { |
803 | if (RxSBG.Opcode == SystemZ::RNSBG) |
804 | return false; |
805 | |
806 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
807 | if (!MaskNode) |
808 | return false; |
809 | |
810 | SDValue Input = N.getOperand(i: 0); |
811 | uint64_t Mask = MaskNode->getZExtValue(); |
812 | if (!refineRxSBGMask(RxSBG, Mask)) { |
813 | // If some bits of Input are already known zeros, those bits will have |
814 | // been removed from the mask. See if adding them back in makes the |
815 | // mask suitable. |
816 | KnownBits Known = CurDAG->computeKnownBits(Op: Input); |
817 | Mask |= Known.Zero.getZExtValue(); |
818 | if (!refineRxSBGMask(RxSBG, Mask)) |
819 | return false; |
820 | } |
821 | RxSBG.Input = Input; |
822 | return true; |
823 | } |
824 | |
825 | case ISD::OR: { |
826 | if (RxSBG.Opcode != SystemZ::RNSBG) |
827 | return false; |
828 | |
829 | auto *MaskNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
830 | if (!MaskNode) |
831 | return false; |
832 | |
833 | SDValue Input = N.getOperand(i: 0); |
834 | uint64_t Mask = ~MaskNode->getZExtValue(); |
835 | if (!refineRxSBGMask(RxSBG, Mask)) { |
836 | // If some bits of Input are already known ones, those bits will have |
837 | // been removed from the mask. See if adding them back in makes the |
838 | // mask suitable. |
839 | KnownBits Known = CurDAG->computeKnownBits(Op: Input); |
840 | Mask &= ~Known.One.getZExtValue(); |
841 | if (!refineRxSBGMask(RxSBG, Mask)) |
842 | return false; |
843 | } |
844 | RxSBG.Input = Input; |
845 | return true; |
846 | } |
847 | |
848 | case ISD::ROTL: { |
849 | // Any 64-bit rotate left can be merged into the RxSBG. |
850 | if (RxSBG.BitSize != 64 || N.getValueType() != MVT::i64) |
851 | return false; |
852 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
853 | if (!CountNode) |
854 | return false; |
855 | |
856 | RxSBG.Rotate = (RxSBG.Rotate + CountNode->getZExtValue()) & 63; |
857 | RxSBG.Input = N.getOperand(i: 0); |
858 | return true; |
859 | } |
860 | |
861 | case ISD::ANY_EXTEND: |
862 | // Bits above the extended operand are don't-care. |
863 | RxSBG.Input = N.getOperand(i: 0); |
864 | return true; |
865 | |
866 | case ISD::ZERO_EXTEND: |
867 | if (RxSBG.Opcode != SystemZ::RNSBG) { |
868 | // Restrict the mask to the extended operand. |
869 | unsigned InnerBitSize = N.getOperand(i: 0).getValueSizeInBits(); |
870 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: InnerBitSize))) |
871 | return false; |
872 | |
873 | RxSBG.Input = N.getOperand(i: 0); |
874 | return true; |
875 | } |
876 | [[fallthrough]]; |
877 | |
878 | case ISD::SIGN_EXTEND: { |
879 | // Check that the extension bits are don't-care (i.e. are masked out |
880 | // by the final mask). |
881 | unsigned BitSize = N.getValueSizeInBits(); |
882 | unsigned InnerBitSize = N.getOperand(i: 0).getValueSizeInBits(); |
883 | if (maskMatters(RxSBG, Mask: allOnes(Count: BitSize) - allOnes(Count: InnerBitSize))) { |
884 | // In the case where only the sign bit is active, increase Rotate with |
885 | // the extension width. |
886 | if (RxSBG.Mask == 1 && RxSBG.Rotate == 1) |
887 | RxSBG.Rotate += (BitSize - InnerBitSize); |
888 | else |
889 | return false; |
890 | } |
891 | |
892 | RxSBG.Input = N.getOperand(i: 0); |
893 | return true; |
894 | } |
895 | |
896 | case ISD::SHL: { |
897 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
898 | if (!CountNode) |
899 | return false; |
900 | |
901 | uint64_t Count = CountNode->getZExtValue(); |
902 | unsigned BitSize = N.getValueSizeInBits(); |
903 | if (Count < 1 || Count >= BitSize) |
904 | return false; |
905 | |
906 | if (RxSBG.Opcode == SystemZ::RNSBG) { |
907 | // Treat (shl X, count) as (rotl X, size-count) as long as the bottom |
908 | // count bits from RxSBG.Input are ignored. |
909 | if (maskMatters(RxSBG, Mask: allOnes(Count))) |
910 | return false; |
911 | } else { |
912 | // Treat (shl X, count) as (and (rotl X, count), ~0<<count). |
913 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: BitSize - Count) << Count)) |
914 | return false; |
915 | } |
916 | |
917 | RxSBG.Rotate = (RxSBG.Rotate + Count) & 63; |
918 | RxSBG.Input = N.getOperand(i: 0); |
919 | return true; |
920 | } |
921 | |
922 | case ISD::SRL: |
923 | case ISD::SRA: { |
924 | auto *CountNode = dyn_cast<ConstantSDNode>(Val: N.getOperand(i: 1).getNode()); |
925 | if (!CountNode) |
926 | return false; |
927 | |
928 | uint64_t Count = CountNode->getZExtValue(); |
929 | unsigned BitSize = N.getValueSizeInBits(); |
930 | if (Count < 1 || Count >= BitSize) |
931 | return false; |
932 | |
933 | if (RxSBG.Opcode == SystemZ::RNSBG || Opcode == ISD::SRA) { |
934 | // Treat (srl|sra X, count) as (rotl X, size-count) as long as the top |
935 | // count bits from RxSBG.Input are ignored. |
936 | if (maskMatters(RxSBG, Mask: allOnes(Count) << (BitSize - Count))) |
937 | return false; |
938 | } else { |
939 | // Treat (srl X, count), mask) as (and (rotl X, size-count), ~0>>count), |
940 | // which is similar to SLL above. |
941 | if (!refineRxSBGMask(RxSBG, Mask: allOnes(Count: BitSize - Count))) |
942 | return false; |
943 | } |
944 | |
945 | RxSBG.Rotate = (RxSBG.Rotate - Count) & 63; |
946 | RxSBG.Input = N.getOperand(i: 0); |
947 | return true; |
948 | } |
949 | default: |
950 | return false; |
951 | } |
952 | } |
953 | |
954 | SDValue SystemZDAGToDAGISel::getUNDEF(const SDLoc &DL, EVT VT) const { |
955 | SDNode *N = CurDAG->getMachineNode(Opcode: TargetOpcode::IMPLICIT_DEF, dl: DL, VT); |
956 | return SDValue(N, 0); |
957 | } |
958 | |
959 | SDValue SystemZDAGToDAGISel::convertTo(const SDLoc &DL, EVT VT, |
960 | SDValue N) const { |
961 | if (N.getValueType() == MVT::i32 && VT == MVT::i64) |
962 | return CurDAG->getTargetInsertSubreg(SystemZ::SRIdx: subreg_l32, |
963 | DL, VT, Operand: getUNDEF(DL, MVT::VT: i64), Subreg: N); |
964 | if (N.getValueType() == MVT::i64 && VT == MVT::i32) |
965 | return CurDAG->getTargetExtractSubreg(SystemZ::SRIdx: subreg_l32, DL, VT, Operand: N); |
966 | assert(N.getValueType() == VT && "Unexpected value types" ); |
967 | return N; |
968 | } |
969 | |
970 | bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) { |
971 | SDLoc DL(N); |
972 | EVT VT = N->getValueType(ResNo: 0); |
973 | if (!VT.isInteger() || VT.getSizeInBits() > 64) |
974 | return false; |
975 | RxSBGOperands RISBG(SystemZ::RISBG, SDValue(N, 0)); |
976 | unsigned Count = 0; |
977 | while (expandRxSBG(RxSBG&: RISBG)) |
978 | // The widening or narrowing is expected to be free. |
979 | // Counting widening or narrowing as a saved operation will result in |
980 | // preferring an R*SBG over a simple shift/logical instruction. |
981 | if (RISBG.Input.getOpcode() != ISD::ANY_EXTEND && |
982 | RISBG.Input.getOpcode() != ISD::TRUNCATE) |
983 | Count += 1; |
984 | if (Count == 0 || isa<ConstantSDNode>(Val: RISBG.Input)) |
985 | return false; |
986 | |
987 | // Prefer to use normal shift instructions over RISBG, since they can handle |
988 | // all cases and are sometimes shorter. |
989 | if (Count == 1 && N->getOpcode() != ISD::AND) |
990 | return false; |
991 | |
992 | // Prefer register extensions like LLC over RISBG. Also prefer to start |
993 | // out with normal ANDs if one instruction would be enough. We can convert |
994 | // these ANDs into an RISBG later if a three-address instruction is useful. |
995 | if (RISBG.Rotate == 0) { |
996 | bool PreferAnd = false; |
997 | // Prefer AND for any 32-bit and-immediate operation. |
998 | if (VT == MVT::i32) |
999 | PreferAnd = true; |
1000 | // As well as for any 64-bit operation that can be implemented via LLC(R), |
1001 | // LLH(R), LLGT(R), or one of the and-immediate instructions. |
1002 | else if (RISBG.Mask == 0xff || |
1003 | RISBG.Mask == 0xffff || |
1004 | RISBG.Mask == 0x7fffffff || |
1005 | SystemZ::isImmLF(Val: ~RISBG.Mask) || |
1006 | SystemZ::isImmHF(Val: ~RISBG.Mask)) |
1007 | PreferAnd = true; |
1008 | // And likewise for the LLZRGF instruction, which doesn't have a register |
1009 | // to register version. |
1010 | else if (auto *Load = dyn_cast<LoadSDNode>(RISBG.Input)) { |
1011 | if (Load->getMemoryVT() == MVT::i32 && |
1012 | (Load->getExtensionType() == ISD::EXTLOAD || |
1013 | Load->getExtensionType() == ISD::ZEXTLOAD) && |
1014 | RISBG.Mask == 0xffffff00 && |
1015 | Subtarget->hasLoadAndZeroRightmostByte()) |
1016 | PreferAnd = true; |
1017 | } |
1018 | if (PreferAnd) { |
1019 | // Replace the current node with an AND. Note that the current node |
1020 | // might already be that same AND, in which case it is already CSE'd |
1021 | // with it, and we must not call ReplaceNode. |
1022 | SDValue In = convertTo(DL, VT, N: RISBG.Input); |
1023 | SDValue Mask = CurDAG->getConstant(Val: RISBG.Mask, DL, VT); |
1024 | SDValue New = CurDAG->getNode(Opcode: ISD::AND, DL, VT, N1: In, N2: Mask); |
1025 | if (N != New.getNode()) { |
1026 | insertDAGNode(DAG: CurDAG, Pos: N, N: Mask); |
1027 | insertDAGNode(DAG: CurDAG, Pos: N, N: New); |
1028 | ReplaceNode(F: N, T: New.getNode()); |
1029 | N = New.getNode(); |
1030 | } |
1031 | // Now, select the machine opcode to implement this operation. |
1032 | if (!N->isMachineOpcode()) |
1033 | SelectCode(N); |
1034 | return true; |
1035 | } |
1036 | } |
1037 | |
1038 | unsigned Opcode = SystemZ::RISBG; |
1039 | // Prefer RISBGN if available, since it does not clobber CC. |
1040 | if (Subtarget->hasMiscellaneousExtensions()) |
1041 | Opcode = SystemZ::RISBGN; |
1042 | EVT OpcodeVT = MVT::i64; |
1043 | if (VT == MVT::i32 && Subtarget->hasHighWord() && |
1044 | // We can only use the 32-bit instructions if all source bits are |
1045 | // in the low 32 bits without wrapping, both after rotation (because |
1046 | // of the smaller range for Start and End) and before rotation |
1047 | // (because the input value is truncated). |
1048 | RISBG.Start >= 32 && RISBG.End >= RISBG.Start && |
1049 | ((RISBG.Start + RISBG.Rotate) & 63) >= 32 && |
1050 | ((RISBG.End + RISBG.Rotate) & 63) >= |
1051 | ((RISBG.Start + RISBG.Rotate) & 63)) { |
1052 | Opcode = SystemZ::RISBMux; |
1053 | OpcodeVT = MVT::i32; |
1054 | RISBG.Start &= 31; |
1055 | RISBG.End &= 31; |
1056 | } |
1057 | SDValue Ops[5] = { |
1058 | getUNDEF(DL, OpcodeVT), |
1059 | convertTo(DL, OpcodeVT, RISBG.Input), |
1060 | CurDAG->getTargetConstant(RISBG.Start, DL, MVT::i32), |
1061 | CurDAG->getTargetConstant(RISBG.End | 128, DL, MVT::i32), |
1062 | CurDAG->getTargetConstant(RISBG.Rotate, DL, MVT::i32) |
1063 | }; |
1064 | SDValue New = convertTo( |
1065 | DL, VT, N: SDValue(CurDAG->getMachineNode(Opcode, dl: DL, VT: OpcodeVT, Ops), 0)); |
1066 | ReplaceNode(F: N, T: New.getNode()); |
1067 | return true; |
1068 | } |
1069 | |
1070 | bool SystemZDAGToDAGISel::tryRxSBG(SDNode *N, unsigned Opcode) { |
1071 | SDLoc DL(N); |
1072 | EVT VT = N->getValueType(ResNo: 0); |
1073 | if (!VT.isInteger() || VT.getSizeInBits() > 64) |
1074 | return false; |
1075 | // Try treating each operand of N as the second operand of the RxSBG |
1076 | // and see which goes deepest. |
1077 | RxSBGOperands RxSBG[] = { |
1078 | RxSBGOperands(Opcode, N->getOperand(Num: 0)), |
1079 | RxSBGOperands(Opcode, N->getOperand(Num: 1)) |
1080 | }; |
1081 | unsigned Count[] = { 0, 0 }; |
1082 | for (unsigned I = 0; I < 2; ++I) |
1083 | while (RxSBG[I].Input->hasOneUse() && expandRxSBG(RxSBG&: RxSBG[I])) |
1084 | // In cases of multiple users it seems better to keep the simple |
1085 | // instruction as they are one cycle faster, and it also helps in cases |
1086 | // where both inputs share a common node. |
1087 | // The widening or narrowing is expected to be free. Counting widening |
1088 | // or narrowing as a saved operation will result in preferring an R*SBG |
1089 | // over a simple shift/logical instruction. |
1090 | if (RxSBG[I].Input.getOpcode() != ISD::ANY_EXTEND && |
1091 | RxSBG[I].Input.getOpcode() != ISD::TRUNCATE) |
1092 | Count[I] += 1; |
1093 | |
1094 | // Do nothing if neither operand is suitable. |
1095 | if (Count[0] == 0 && Count[1] == 0) |
1096 | return false; |
1097 | |
1098 | // Pick the deepest second operand. |
1099 | unsigned I = Count[0] > Count[1] ? 0 : 1; |
1100 | SDValue Op0 = N->getOperand(Num: I ^ 1); |
1101 | |
1102 | // Prefer IC for character insertions from memory. |
1103 | if (Opcode == SystemZ::ROSBG && (RxSBG[I].Mask & 0xff) == 0) |
1104 | if (auto *Load = dyn_cast<LoadSDNode>(Val: Op0.getNode())) |
1105 | if (Load->getMemoryVT() == MVT::i8) |
1106 | return false; |
1107 | |
1108 | // See whether we can avoid an AND in the first operand by converting |
1109 | // ROSBG to RISBG. |
1110 | if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op&: Op0, InsertMask: RxSBG[I].Mask)) { |
1111 | Opcode = SystemZ::RISBG; |
1112 | // Prefer RISBGN if available, since it does not clobber CC. |
1113 | if (Subtarget->hasMiscellaneousExtensions()) |
1114 | Opcode = SystemZ::RISBGN; |
1115 | } |
1116 | |
1117 | SDValue Ops[5] = { |
1118 | convertTo(DL, MVT::i64, Op0), |
1119 | convertTo(DL, MVT::i64, RxSBG[I].Input), |
1120 | CurDAG->getTargetConstant(RxSBG[I].Start, DL, MVT::i32), |
1121 | CurDAG->getTargetConstant(RxSBG[I].End, DL, MVT::i32), |
1122 | CurDAG->getTargetConstant(RxSBG[I].Rotate, DL, MVT::i32) |
1123 | }; |
1124 | SDValue New = convertTo( |
1125 | DL, VT, SDValue(CurDAG->getMachineNode(Opcode, DL, MVT::i64, Ops), 0)); |
1126 | ReplaceNode(F: N, T: New.getNode()); |
1127 | return true; |
1128 | } |
1129 | |
1130 | void SystemZDAGToDAGISel::splitLargeImmediate(unsigned Opcode, SDNode *Node, |
1131 | SDValue Op0, uint64_t UpperVal, |
1132 | uint64_t LowerVal) { |
1133 | EVT VT = Node->getValueType(ResNo: 0); |
1134 | SDLoc DL(Node); |
1135 | SDValue Upper = CurDAG->getConstant(Val: UpperVal, DL, VT); |
1136 | if (Op0.getNode()) |
1137 | Upper = CurDAG->getNode(Opcode, DL, VT, N1: Op0, N2: Upper); |
1138 | |
1139 | { |
1140 | // When we haven't passed in Op0, Upper will be a constant. In order to |
1141 | // prevent folding back to the large immediate in `Or = getNode(...)` we run |
1142 | // SelectCode first and end up with an opaque machine node. This means that |
1143 | // we need to use a handle to keep track of Upper in case it gets CSE'd by |
1144 | // SelectCode. |
1145 | // |
1146 | // Note that in the case where Op0 is passed in we could just call |
1147 | // SelectCode(Upper) later, along with the SelectCode(Or), and avoid needing |
1148 | // the handle at all, but it's fine to do it here. |
1149 | // |
1150 | // TODO: This is a pretty hacky way to do this. Can we do something that |
1151 | // doesn't require a two paragraph explanation? |
1152 | HandleSDNode Handle(Upper); |
1153 | SelectCode(Upper.getNode()); |
1154 | Upper = Handle.getValue(); |
1155 | } |
1156 | |
1157 | SDValue Lower = CurDAG->getConstant(Val: LowerVal, DL, VT); |
1158 | SDValue Or = CurDAG->getNode(Opcode, DL, VT, N1: Upper, N2: Lower); |
1159 | |
1160 | ReplaceNode(F: Node, T: Or.getNode()); |
1161 | |
1162 | SelectCode(Or.getNode()); |
1163 | } |
1164 | |
1165 | void SystemZDAGToDAGISel::loadVectorConstant( |
1166 | const SystemZVectorConstantInfo &VCI, SDNode *Node) { |
1167 | assert((VCI.Opcode == SystemZISD::BYTE_MASK || |
1168 | VCI.Opcode == SystemZISD::REPLICATE || |
1169 | VCI.Opcode == SystemZISD::ROTATE_MASK) && |
1170 | "Bad opcode!" ); |
1171 | assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type" ); |
1172 | EVT VT = Node->getValueType(ResNo: 0); |
1173 | SDLoc DL(Node); |
1174 | SmallVector<SDValue, 2> Ops; |
1175 | for (unsigned OpVal : VCI.OpVals) |
1176 | Ops.push_back(CurDAG->getTargetConstant(OpVal, DL, MVT::i32)); |
1177 | SDValue Op = CurDAG->getNode(Opcode: VCI.Opcode, DL, VT: VCI.VecVT, Ops); |
1178 | |
1179 | if (VCI.VecVT == VT.getSimpleVT()) |
1180 | ReplaceNode(F: Node, T: Op.getNode()); |
1181 | else if (VT.getSizeInBits() == 128) { |
1182 | SDValue BitCast = CurDAG->getNode(Opcode: ISD::BITCAST, DL, VT, Operand: Op); |
1183 | ReplaceNode(F: Node, T: BitCast.getNode()); |
1184 | SelectCode(BitCast.getNode()); |
1185 | } else { // float or double |
1186 | unsigned SubRegIdx = |
1187 | (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); |
1188 | ReplaceNode( |
1189 | F: Node, T: CurDAG->getTargetExtractSubreg(SRIdx: SubRegIdx, DL, VT, Operand: Op).getNode()); |
1190 | } |
1191 | SelectCode(Op.getNode()); |
1192 | } |
1193 | |
1194 | SDNode *SystemZDAGToDAGISel::loadPoolVectorConstant(APInt Val, EVT VT, SDLoc DL) { |
1195 | SDNode *ResNode; |
1196 | assert (VT.getSizeInBits() == 128); |
1197 | |
1198 | SDValue CP = CurDAG->getTargetConstantPool( |
1199 | C: ConstantInt::get(Ty: Type::getInt128Ty(C&: *CurDAG->getContext()), V: Val), |
1200 | VT: TLI->getPointerTy(DL: CurDAG->getDataLayout())); |
1201 | |
1202 | EVT PtrVT = CP.getValueType(); |
1203 | SDValue Ops[] = { |
1204 | SDValue(CurDAG->getMachineNode(SystemZ::LARL, DL, PtrVT, CP), 0), |
1205 | CurDAG->getTargetConstant(0, DL, PtrVT), |
1206 | CurDAG->getRegister(0, PtrVT), |
1207 | CurDAG->getEntryNode() |
1208 | }; |
1209 | ResNode = CurDAG->getMachineNode(SystemZ::VL, DL, VT, MVT::Other, Ops); |
1210 | |
1211 | // Annotate ResNode with memory operand information so that MachineInstr |
1212 | // queries work properly. This e.g. gives the register allocation the |
1213 | // required information for rematerialization. |
1214 | MachineFunction& MF = CurDAG->getMachineFunction(); |
1215 | MachineMemOperand *MemOp = |
1216 | MF.getMachineMemOperand(PtrInfo: MachinePointerInfo::getConstantPool(MF), |
1217 | F: MachineMemOperand::MOLoad, Size: 16, BaseAlignment: Align(8)); |
1218 | |
1219 | CurDAG->setNodeMemRefs(N: cast<MachineSDNode>(Val: ResNode), NewMemRefs: {MemOp}); |
1220 | return ResNode; |
1221 | } |
1222 | |
1223 | bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { |
1224 | SDValue ElemV = N->getOperand(Num: 2); |
1225 | auto *ElemN = dyn_cast<ConstantSDNode>(Val&: ElemV); |
1226 | if (!ElemN) |
1227 | return false; |
1228 | |
1229 | unsigned Elem = ElemN->getZExtValue(); |
1230 | EVT VT = N->getValueType(ResNo: 0); |
1231 | if (Elem >= VT.getVectorNumElements()) |
1232 | return false; |
1233 | |
1234 | auto *Load = dyn_cast<LoadSDNode>(Val: N->getOperand(Num: 1)); |
1235 | if (!Load || !Load->hasNUsesOfValue(NUses: 1, Value: 0)) |
1236 | return false; |
1237 | if (Load->getMemoryVT().getSizeInBits() != |
1238 | Load->getValueType(ResNo: 0).getSizeInBits()) |
1239 | return false; |
1240 | |
1241 | SDValue Base, Disp, Index; |
1242 | if (!selectBDVAddr12Only(Addr: Load->getBasePtr(), Elem: ElemV, Base, Disp, Index) || |
1243 | Index.getValueType() != VT.changeVectorElementTypeToInteger()) |
1244 | return false; |
1245 | |
1246 | SDLoc DL(Load); |
1247 | SDValue Ops[] = { |
1248 | N->getOperand(0), Base, Disp, Index, |
1249 | CurDAG->getTargetConstant(Elem, DL, MVT::i32), Load->getChain() |
1250 | }; |
1251 | SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); |
1252 | ReplaceUses(F: SDValue(Load, 1), T: SDValue(Res, 1)); |
1253 | ReplaceNode(F: N, T: Res); |
1254 | return true; |
1255 | } |
1256 | |
1257 | bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { |
1258 | SDValue Value = Store->getValue(); |
1259 | if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
1260 | return false; |
1261 | if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits()) |
1262 | return false; |
1263 | |
1264 | SDValue ElemV = Value.getOperand(i: 1); |
1265 | auto *ElemN = dyn_cast<ConstantSDNode>(Val&: ElemV); |
1266 | if (!ElemN) |
1267 | return false; |
1268 | |
1269 | SDValue Vec = Value.getOperand(i: 0); |
1270 | EVT VT = Vec.getValueType(); |
1271 | unsigned Elem = ElemN->getZExtValue(); |
1272 | if (Elem >= VT.getVectorNumElements()) |
1273 | return false; |
1274 | |
1275 | SDValue Base, Disp, Index; |
1276 | if (!selectBDVAddr12Only(Addr: Store->getBasePtr(), Elem: ElemV, Base, Disp, Index) || |
1277 | Index.getValueType() != VT.changeVectorElementTypeToInteger()) |
1278 | return false; |
1279 | |
1280 | SDLoc DL(Store); |
1281 | SDValue Ops[] = { |
1282 | Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, DL, MVT::i32), |
1283 | Store->getChain() |
1284 | }; |
1285 | ReplaceNode(Store, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); |
1286 | return true; |
1287 | } |
1288 | |
1289 | // Check whether or not the chain ending in StoreNode is suitable for doing |
1290 | // the {load; op; store} to modify transformation. |
1291 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, |
1292 | SDValue StoredVal, SelectionDAG *CurDAG, |
1293 | LoadSDNode *&LoadNode, |
1294 | SDValue &InputChain) { |
1295 | // Is the stored value result 0 of the operation? |
1296 | if (StoredVal.getResNo() != 0) |
1297 | return false; |
1298 | |
1299 | // Are there other uses of the loaded value than the operation? |
1300 | if (!StoredVal.getNode()->hasNUsesOfValue(NUses: 1, Value: 0)) |
1301 | return false; |
1302 | |
1303 | // Is the store non-extending and non-indexed? |
1304 | if (!ISD::isNormalStore(N: StoreNode) || StoreNode->isNonTemporal()) |
1305 | return false; |
1306 | |
1307 | SDValue Load = StoredVal->getOperand(Num: 0); |
1308 | // Is the stored value a non-extending and non-indexed load? |
1309 | if (!ISD::isNormalLoad(N: Load.getNode())) |
1310 | return false; |
1311 | |
1312 | // Return LoadNode by reference. |
1313 | LoadNode = cast<LoadSDNode>(Val&: Load); |
1314 | |
1315 | // Is store the only read of the loaded value? |
1316 | if (!Load.hasOneUse()) |
1317 | return false; |
1318 | |
1319 | // Is the address of the store the same as the load? |
1320 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || |
1321 | LoadNode->getOffset() != StoreNode->getOffset()) |
1322 | return false; |
1323 | |
1324 | // Check if the chain is produced by the load or is a TokenFactor with |
1325 | // the load output chain as an operand. Return InputChain by reference. |
1326 | SDValue Chain = StoreNode->getChain(); |
1327 | |
1328 | bool ChainCheck = false; |
1329 | if (Chain == Load.getValue(R: 1)) { |
1330 | ChainCheck = true; |
1331 | InputChain = LoadNode->getChain(); |
1332 | } else if (Chain.getOpcode() == ISD::TokenFactor) { |
1333 | SmallVector<SDValue, 4> ChainOps; |
1334 | SmallVector<const SDNode *, 4> LoopWorklist; |
1335 | SmallPtrSet<const SDNode *, 16> Visited; |
1336 | const unsigned int Max = 1024; |
1337 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { |
1338 | SDValue Op = Chain.getOperand(i); |
1339 | if (Op == Load.getValue(R: 1)) { |
1340 | ChainCheck = true; |
1341 | // Drop Load, but keep its chain. No cycle check necessary. |
1342 | ChainOps.push_back(Elt: Load.getOperand(i: 0)); |
1343 | continue; |
1344 | } |
1345 | LoopWorklist.push_back(Elt: Op.getNode()); |
1346 | ChainOps.push_back(Elt: Op); |
1347 | } |
1348 | |
1349 | if (ChainCheck) { |
1350 | // Add the other operand of StoredVal to worklist. |
1351 | for (SDValue Op : StoredVal->ops()) |
1352 | if (Op.getNode() != LoadNode) |
1353 | LoopWorklist.push_back(Elt: Op.getNode()); |
1354 | |
1355 | // Check if Load is reachable from any of the nodes in the worklist. |
1356 | if (SDNode::hasPredecessorHelper(N: Load.getNode(), Visited, Worklist&: LoopWorklist, MaxSteps: Max, |
1357 | TopologicalPrune: true)) |
1358 | return false; |
1359 | |
1360 | // Make a new TokenFactor with all the other input chains except |
1361 | // for the load. |
1362 | InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), |
1363 | MVT::Other, ChainOps); |
1364 | } |
1365 | } |
1366 | if (!ChainCheck) |
1367 | return false; |
1368 | |
1369 | return true; |
1370 | } |
1371 | |
1372 | // Change a chain of {load; op; store} of the same value into a simple op |
1373 | // through memory of that value, if the uses of the modified value and its |
1374 | // address are suitable. |
1375 | // |
1376 | // The tablegen pattern memory operand pattern is currently not able to match |
1377 | // the case where the CC on the original operation are used. |
1378 | // |
1379 | // See the equivalent routine in X86ISelDAGToDAG for further comments. |
1380 | bool SystemZDAGToDAGISel::tryFoldLoadStoreIntoMemOperand(SDNode *Node) { |
1381 | StoreSDNode *StoreNode = cast<StoreSDNode>(Val: Node); |
1382 | SDValue StoredVal = StoreNode->getOperand(Num: 1); |
1383 | unsigned Opc = StoredVal->getOpcode(); |
1384 | SDLoc DL(StoreNode); |
1385 | |
1386 | // Before we try to select anything, make sure this is memory operand size |
1387 | // and opcode we can handle. Note that this must match the code below that |
1388 | // actually lowers the opcodes. |
1389 | EVT MemVT = StoreNode->getMemoryVT(); |
1390 | unsigned NewOpc = 0; |
1391 | bool NegateOperand = false; |
1392 | switch (Opc) { |
1393 | default: |
1394 | return false; |
1395 | case SystemZISD::SSUBO: |
1396 | NegateOperand = true; |
1397 | [[fallthrough]]; |
1398 | case SystemZISD::SADDO: |
1399 | if (MemVT == MVT::i32) |
1400 | NewOpc = SystemZ::ASI; |
1401 | else if (MemVT == MVT::i64) |
1402 | NewOpc = SystemZ::AGSI; |
1403 | else |
1404 | return false; |
1405 | break; |
1406 | case SystemZISD::USUBO: |
1407 | NegateOperand = true; |
1408 | [[fallthrough]]; |
1409 | case SystemZISD::UADDO: |
1410 | if (MemVT == MVT::i32) |
1411 | NewOpc = SystemZ::ALSI; |
1412 | else if (MemVT == MVT::i64) |
1413 | NewOpc = SystemZ::ALGSI; |
1414 | else |
1415 | return false; |
1416 | break; |
1417 | } |
1418 | |
1419 | LoadSDNode *LoadNode = nullptr; |
1420 | SDValue InputChain; |
1421 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadNode, |
1422 | InputChain)) |
1423 | return false; |
1424 | |
1425 | SDValue Operand = StoredVal.getOperand(i: 1); |
1426 | auto *OperandC = dyn_cast<ConstantSDNode>(Val&: Operand); |
1427 | if (!OperandC) |
1428 | return false; |
1429 | auto OperandV = OperandC->getAPIntValue(); |
1430 | if (NegateOperand) |
1431 | OperandV = -OperandV; |
1432 | if (OperandV.getSignificantBits() > 8) |
1433 | return false; |
1434 | Operand = CurDAG->getTargetConstant(Val: OperandV, DL, VT: MemVT); |
1435 | |
1436 | SDValue Base, Disp; |
1437 | if (!selectBDAddr20Only(Addr: StoreNode->getBasePtr(), Base, Disp)) |
1438 | return false; |
1439 | |
1440 | SDValue Ops[] = { Base, Disp, Operand, InputChain }; |
1441 | MachineSDNode *Result = |
1442 | CurDAG->getMachineNode(NewOpc, DL, MVT::i32, MVT::Other, Ops); |
1443 | CurDAG->setNodeMemRefs( |
1444 | N: Result, NewMemRefs: {StoreNode->getMemOperand(), LoadNode->getMemOperand()}); |
1445 | |
1446 | ReplaceUses(F: SDValue(StoreNode, 0), T: SDValue(Result, 1)); |
1447 | ReplaceUses(F: SDValue(StoredVal.getNode(), 1), T: SDValue(Result, 0)); |
1448 | CurDAG->RemoveDeadNode(N: Node); |
1449 | return true; |
1450 | } |
1451 | |
1452 | bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, |
1453 | LoadSDNode *Load) const { |
1454 | // Check that the two memory operands have the same size. |
1455 | if (Load->getMemoryVT() != Store->getMemoryVT()) |
1456 | return false; |
1457 | |
1458 | // Volatility stops an access from being decomposed. |
1459 | if (Load->isVolatile() || Store->isVolatile()) |
1460 | return false; |
1461 | |
1462 | // There's no chance of overlap if the load is invariant. |
1463 | if (Load->isInvariant() && Load->isDereferenceable()) |
1464 | return true; |
1465 | |
1466 | // Otherwise we need to check whether there's an alias. |
1467 | const Value *V1 = Load->getMemOperand()->getValue(); |
1468 | const Value *V2 = Store->getMemOperand()->getValue(); |
1469 | if (!V1 || !V2) |
1470 | return false; |
1471 | |
1472 | // Reject equality. |
1473 | uint64_t Size = Load->getMemoryVT().getStoreSize(); |
1474 | int64_t End1 = Load->getSrcValueOffset() + Size; |
1475 | int64_t End2 = Store->getSrcValueOffset() + Size; |
1476 | if (V1 == V2 && End1 == End2) |
1477 | return false; |
1478 | |
1479 | return AA->isNoAlias(LocA: MemoryLocation(V1, End1, Load->getAAInfo()), |
1480 | LocB: MemoryLocation(V2, End2, Store->getAAInfo())); |
1481 | } |
1482 | |
1483 | bool SystemZDAGToDAGISel::storeLoadCanUseMVC(SDNode *N) const { |
1484 | auto *Store = cast<StoreSDNode>(Val: N); |
1485 | auto *Load = cast<LoadSDNode>(Val: Store->getValue()); |
1486 | |
1487 | // Prefer not to use MVC if either address can use ... RELATIVE LONG |
1488 | // instructions. |
1489 | uint64_t Size = Load->getMemoryVT().getStoreSize(); |
1490 | if (Size > 1 && Size <= 8) { |
1491 | // Prefer LHRL, LRL and LGRL. |
1492 | if (SystemZISD::isPCREL(Opcode: Load->getBasePtr().getOpcode())) |
1493 | return false; |
1494 | // Prefer STHRL, STRL and STGRL. |
1495 | if (SystemZISD::isPCREL(Opcode: Store->getBasePtr().getOpcode())) |
1496 | return false; |
1497 | } |
1498 | |
1499 | return canUseBlockOperation(Store, Load); |
1500 | } |
1501 | |
1502 | bool SystemZDAGToDAGISel::storeLoadCanUseBlockBinary(SDNode *N, |
1503 | unsigned I) const { |
1504 | auto *StoreA = cast<StoreSDNode>(Val: N); |
1505 | auto *LoadA = cast<LoadSDNode>(Val: StoreA->getValue().getOperand(i: 1 - I)); |
1506 | auto *LoadB = cast<LoadSDNode>(Val: StoreA->getValue().getOperand(i: I)); |
1507 | return !LoadA->isVolatile() && LoadA->getMemoryVT() == LoadB->getMemoryVT() && |
1508 | canUseBlockOperation(Store: StoreA, Load: LoadB); |
1509 | } |
1510 | |
1511 | bool SystemZDAGToDAGISel::storeLoadIsAligned(SDNode *N) const { |
1512 | |
1513 | auto *MemAccess = cast<MemSDNode>(Val: N); |
1514 | auto *LdSt = dyn_cast<LSBaseSDNode>(Val: MemAccess); |
1515 | TypeSize StoreSize = MemAccess->getMemoryVT().getStoreSize(); |
1516 | SDValue BasePtr = MemAccess->getBasePtr(); |
1517 | MachineMemOperand *MMO = MemAccess->getMemOperand(); |
1518 | assert(MMO && "Expected a memory operand." ); |
1519 | |
1520 | // The memory access must have a proper alignment and no index register. |
1521 | // Only load and store nodes have the offset operand (atomic loads do not). |
1522 | if (MemAccess->getAlign().value() < StoreSize || |
1523 | (LdSt && !LdSt->getOffset().isUndef())) |
1524 | return false; |
1525 | |
1526 | // The MMO must not have an unaligned offset. |
1527 | if (MMO->getOffset() % StoreSize != 0) |
1528 | return false; |
1529 | |
1530 | // An access to GOT or the Constant Pool is aligned. |
1531 | if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) |
1532 | if ((PSV->isGOT() || PSV->isConstantPool())) |
1533 | return true; |
1534 | |
1535 | // Check the alignment of a Global Address. |
1536 | if (BasePtr.getNumOperands()) |
1537 | if (GlobalAddressSDNode *GA = |
1538 | dyn_cast<GlobalAddressSDNode>(Val: BasePtr.getOperand(i: 0))) { |
1539 | // The immediate offset must be aligned. |
1540 | if (GA->getOffset() % StoreSize != 0) |
1541 | return false; |
1542 | |
1543 | // The alignment of the symbol itself must be at least the store size. |
1544 | const GlobalValue *GV = GA->getGlobal(); |
1545 | const DataLayout &DL = GV->getParent()->getDataLayout(); |
1546 | if (GV->getPointerAlignment(DL).value() < StoreSize) |
1547 | return false; |
1548 | } |
1549 | |
1550 | return true; |
1551 | } |
1552 | |
1553 | ISD::LoadExtType SystemZDAGToDAGISel::getLoadExtType(SDNode *N) const { |
1554 | ISD::LoadExtType ETy; |
1555 | if (auto *L = dyn_cast<LoadSDNode>(Val: N)) |
1556 | ETy = L->getExtensionType(); |
1557 | else if (auto *AL = dyn_cast<AtomicSDNode>(Val: N)) |
1558 | ETy = AL->getExtensionType(); |
1559 | else |
1560 | llvm_unreachable("Unkown load node type." ); |
1561 | return ETy; |
1562 | } |
1563 | |
1564 | void SystemZDAGToDAGISel::Select(SDNode *Node) { |
1565 | // If we have a custom node, we already have selected! |
1566 | if (Node->isMachineOpcode()) { |
1567 | LLVM_DEBUG(errs() << "== " ; Node->dump(CurDAG); errs() << "\n" ); |
1568 | Node->setNodeId(-1); |
1569 | return; |
1570 | } |
1571 | |
1572 | unsigned Opcode = Node->getOpcode(); |
1573 | switch (Opcode) { |
1574 | case ISD::OR: |
1575 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1576 | if (tryRxSBG(Node, SystemZ::ROSBG)) |
1577 | return; |
1578 | goto or_xor; |
1579 | |
1580 | case ISD::XOR: |
1581 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1582 | if (tryRxSBG(Node, SystemZ::RXSBG)) |
1583 | return; |
1584 | // Fall through. |
1585 | or_xor: |
1586 | // If this is a 64-bit operation in which both 32-bit halves are nonzero, |
1587 | // split the operation into two. If both operands here happen to be |
1588 | // constant, leave this to common code to optimize. |
1589 | if (Node->getValueType(0) == MVT::i64 && |
1590 | Node->getOperand(0).getOpcode() != ISD::Constant) |
1591 | if (auto *Op1 = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1))) { |
1592 | uint64_t Val = Op1->getZExtValue(); |
1593 | // Don't split the operation if we can match one of the combined |
1594 | // logical operations provided by miscellaneous-extensions-3. |
1595 | if (Subtarget->hasMiscellaneousExtensions3()) { |
1596 | unsigned ChildOpcode = Node->getOperand(Num: 0).getOpcode(); |
1597 | // Check whether this expression matches NAND/NOR/NXOR. |
1598 | if (Val == (uint64_t)-1 && Opcode == ISD::XOR) |
1599 | if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR || |
1600 | ChildOpcode == ISD::XOR) |
1601 | break; |
1602 | // Check whether this expression matches OR-with-complement |
1603 | // (or matches an alternate pattern for NXOR). |
1604 | if (ChildOpcode == ISD::XOR) { |
1605 | auto Op0 = Node->getOperand(Num: 0); |
1606 | if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Val: Op0->getOperand(Num: 1))) |
1607 | if (Op0Op1->getZExtValue() == (uint64_t)-1) |
1608 | break; |
1609 | } |
1610 | } |
1611 | // Don't split an XOR with -1 as LCGR/AGHI is more compact. |
1612 | if (Opcode == ISD::XOR && Op1->isAllOnes()) |
1613 | break; |
1614 | if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) { |
1615 | splitLargeImmediate(Opcode, Node, Op0: Node->getOperand(Num: 0), |
1616 | UpperVal: Val - uint32_t(Val), LowerVal: uint32_t(Val)); |
1617 | return; |
1618 | } |
1619 | } |
1620 | break; |
1621 | |
1622 | case ISD::AND: |
1623 | if (Node->getOperand(Num: 1).getOpcode() != ISD::Constant) |
1624 | if (tryRxSBG(Node, SystemZ::RNSBG)) |
1625 | return; |
1626 | [[fallthrough]]; |
1627 | case ISD::ROTL: |
1628 | case ISD::SHL: |
1629 | case ISD::SRL: |
1630 | case ISD::ZERO_EXTEND: |
1631 | if (tryRISBGZero(N: Node)) |
1632 | return; |
1633 | break; |
1634 | |
1635 | case ISD::BSWAP: |
1636 | if (Node->getValueType(0) == MVT::i128) { |
1637 | SDLoc DL(Node); |
1638 | SDValue Src = Node->getOperand(Num: 0); |
1639 | Src = CurDAG->getNode(ISD::BITCAST, DL, MVT::v16i8, Src); |
1640 | |
1641 | uint64_t Bytes[2] = { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }; |
1642 | SDNode *Mask = loadPoolVectorConstant(APInt(128, Bytes), MVT::v16i8, DL); |
1643 | SDValue Ops[] = { Src, Src, SDValue(Mask, 0) }; |
1644 | SDValue Res = SDValue(CurDAG->getMachineNode(SystemZ::VPERM, DL, |
1645 | MVT::v16i8, Ops), 0); |
1646 | |
1647 | Res = CurDAG->getNode(ISD::BITCAST, DL, MVT::i128, Res); |
1648 | SDNode *ResNode = Res.getNode(); |
1649 | ReplaceNode(F: Node, T: ResNode); |
1650 | SelectCode(Src.getNode()); |
1651 | SelectCode(ResNode); |
1652 | return; |
1653 | } |
1654 | break; |
1655 | |
1656 | case ISD::Constant: |
1657 | // If this is a 64-bit constant that is out of the range of LLILF, |
1658 | // LLIHF and LGFI, split it into two 32-bit pieces. |
1659 | if (Node->getValueType(0) == MVT::i64) { |
1660 | uint64_t Val = Node->getAsZExtVal(); |
1661 | if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val) && !isInt<32>(x: Val)) { |
1662 | splitLargeImmediate(Opcode: ISD::OR, Node, Op0: SDValue(), UpperVal: Val - uint32_t(Val), |
1663 | LowerVal: uint32_t(Val)); |
1664 | return; |
1665 | } |
1666 | } |
1667 | if (Node->getValueType(0) == MVT::i128) { |
1668 | const APInt &Val = Node->getAsAPIntVal(); |
1669 | SystemZVectorConstantInfo VCI(Val); |
1670 | if (VCI.isVectorConstantLegal(Subtarget: *Subtarget)) { |
1671 | loadVectorConstant(VCI, Node); |
1672 | return; |
1673 | } |
1674 | // If we can't materialize the constant we need to use a literal pool. |
1675 | SDNode *ResNode = loadPoolVectorConstant(Val, MVT::i128, SDLoc(Node)); |
1676 | ReplaceNode(F: Node, T: ResNode); |
1677 | return; |
1678 | } |
1679 | break; |
1680 | |
1681 | case SystemZISD::SELECT_CCMASK: { |
1682 | SDValue Op0 = Node->getOperand(Num: 0); |
1683 | SDValue Op1 = Node->getOperand(Num: 1); |
1684 | // Prefer to put any load first, so that it can be matched as a |
1685 | // conditional load. Likewise for constants in range for LOCHI. |
1686 | if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) || |
1687 | (Subtarget->hasLoadStoreOnCond2() && |
1688 | Node->getValueType(ResNo: 0).isInteger() && |
1689 | Node->getValueType(ResNo: 0).getSizeInBits() <= 64 && |
1690 | Op1.getOpcode() == ISD::Constant && |
1691 | isInt<16>(x: cast<ConstantSDNode>(Val&: Op1)->getSExtValue()) && |
1692 | !(Op0.getOpcode() == ISD::Constant && |
1693 | isInt<16>(x: cast<ConstantSDNode>(Val&: Op0)->getSExtValue())))) { |
1694 | SDValue CCValid = Node->getOperand(Num: 2); |
1695 | SDValue CCMask = Node->getOperand(Num: 3); |
1696 | uint64_t ConstCCValid = CCValid.getNode()->getAsZExtVal(); |
1697 | uint64_t ConstCCMask = CCMask.getNode()->getAsZExtVal(); |
1698 | // Invert the condition. |
1699 | CCMask = CurDAG->getTargetConstant(Val: ConstCCValid ^ ConstCCMask, |
1700 | DL: SDLoc(Node), VT: CCMask.getValueType()); |
1701 | SDValue Op4 = Node->getOperand(Num: 4); |
1702 | SDNode *UpdatedNode = |
1703 | CurDAG->UpdateNodeOperands(N: Node, Op1, Op2: Op0, Op3: CCValid, Op4: CCMask, Op5: Op4); |
1704 | if (UpdatedNode != Node) { |
1705 | // In case this node already exists then replace Node with it. |
1706 | ReplaceNode(F: Node, T: UpdatedNode); |
1707 | Node = UpdatedNode; |
1708 | } |
1709 | } |
1710 | break; |
1711 | } |
1712 | |
1713 | case ISD::INSERT_VECTOR_ELT: { |
1714 | EVT VT = Node->getValueType(ResNo: 0); |
1715 | unsigned ElemBitSize = VT.getScalarSizeInBits(); |
1716 | if (ElemBitSize == 32) { |
1717 | if (tryGather(Node, SystemZ::VGEF)) |
1718 | return; |
1719 | } else if (ElemBitSize == 64) { |
1720 | if (tryGather(Node, SystemZ::VGEG)) |
1721 | return; |
1722 | } |
1723 | break; |
1724 | } |
1725 | |
1726 | case ISD::BUILD_VECTOR: { |
1727 | auto *BVN = cast<BuildVectorSDNode>(Val: Node); |
1728 | SystemZVectorConstantInfo VCI(BVN); |
1729 | if (VCI.isVectorConstantLegal(Subtarget: *Subtarget)) { |
1730 | loadVectorConstant(VCI, Node); |
1731 | return; |
1732 | } |
1733 | break; |
1734 | } |
1735 | |
1736 | case ISD::ConstantFP: { |
1737 | APFloat Imm = cast<ConstantFPSDNode>(Val: Node)->getValueAPF(); |
1738 | if (Imm.isZero() || Imm.isNegZero()) |
1739 | break; |
1740 | SystemZVectorConstantInfo VCI(Imm); |
1741 | bool Success = VCI.isVectorConstantLegal(Subtarget: *Subtarget); (void)Success; |
1742 | assert(Success && "Expected legal FP immediate" ); |
1743 | loadVectorConstant(VCI, Node); |
1744 | return; |
1745 | } |
1746 | |
1747 | case ISD::STORE: { |
1748 | if (tryFoldLoadStoreIntoMemOperand(Node)) |
1749 | return; |
1750 | auto *Store = cast<StoreSDNode>(Val: Node); |
1751 | unsigned ElemBitSize = Store->getValue().getValueSizeInBits(); |
1752 | if (ElemBitSize == 32) { |
1753 | if (tryScatter(Store, SystemZ::VSCEF)) |
1754 | return; |
1755 | } else if (ElemBitSize == 64) { |
1756 | if (tryScatter(Store, SystemZ::VSCEG)) |
1757 | return; |
1758 | } |
1759 | break; |
1760 | } |
1761 | |
1762 | case ISD::ATOMIC_STORE: { |
1763 | auto *AtomOp = cast<AtomicSDNode>(Val: Node); |
1764 | // Replace the atomic_store with a regular store and select it. This is |
1765 | // ok since we know all store instructions <= 8 bytes are atomic, and the |
1766 | // 16 byte case is already handled during lowering. |
1767 | StoreSDNode *St = cast<StoreSDNode>(Val: CurDAG->getTruncStore( |
1768 | Chain: AtomOp->getChain(), dl: SDLoc(AtomOp), Val: AtomOp->getVal(), |
1769 | Ptr: AtomOp->getBasePtr(), SVT: AtomOp->getMemoryVT(), MMO: AtomOp->getMemOperand())); |
1770 | assert(St->getMemOperand()->isAtomic() && "Broken MMO." ); |
1771 | SDNode *Chain = St; |
1772 | // We have to enforce sequential consistency by performing a |
1773 | // serialization operation after the store. |
1774 | if (AtomOp->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent) |
1775 | Chain = CurDAG->getMachineNode(SystemZ::Serialize, SDLoc(AtomOp), |
1776 | MVT::Other, SDValue(Chain, 0)); |
1777 | ReplaceNode(F: Node, T: Chain); |
1778 | SelectCode(St); |
1779 | return; |
1780 | } |
1781 | } |
1782 | |
1783 | SelectCode(Node); |
1784 | } |
1785 | |
1786 | bool SystemZDAGToDAGISel::SelectInlineAsmMemoryOperand( |
1787 | const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, |
1788 | std::vector<SDValue> &OutOps) { |
1789 | SystemZAddressingMode::AddrForm Form; |
1790 | SystemZAddressingMode::DispRange DispRange; |
1791 | SDValue Base, Disp, Index; |
1792 | |
1793 | switch(ConstraintID) { |
1794 | default: |
1795 | llvm_unreachable("Unexpected asm memory constraint" ); |
1796 | case InlineAsm::ConstraintCode::i: |
1797 | case InlineAsm::ConstraintCode::Q: |
1798 | case InlineAsm::ConstraintCode::ZQ: |
1799 | // Accept an address with a short displacement, but no index. |
1800 | Form = SystemZAddressingMode::FormBD; |
1801 | DispRange = SystemZAddressingMode::Disp12Only; |
1802 | break; |
1803 | case InlineAsm::ConstraintCode::R: |
1804 | case InlineAsm::ConstraintCode::ZR: |
1805 | // Accept an address with a short displacement and an index. |
1806 | Form = SystemZAddressingMode::FormBDXNormal; |
1807 | DispRange = SystemZAddressingMode::Disp12Only; |
1808 | break; |
1809 | case InlineAsm::ConstraintCode::S: |
1810 | case InlineAsm::ConstraintCode::ZS: |
1811 | // Accept an address with a long displacement, but no index. |
1812 | Form = SystemZAddressingMode::FormBD; |
1813 | DispRange = SystemZAddressingMode::Disp20Only; |
1814 | break; |
1815 | case InlineAsm::ConstraintCode::T: |
1816 | case InlineAsm::ConstraintCode::m: |
1817 | case InlineAsm::ConstraintCode::o: |
1818 | case InlineAsm::ConstraintCode::p: |
1819 | case InlineAsm::ConstraintCode::ZT: |
1820 | // Accept an address with a long displacement and an index. |
1821 | // m works the same as T, as this is the most general case. |
1822 | // We don't really have any special handling of "offsettable" |
1823 | // memory addresses, so just treat o the same as m. |
1824 | Form = SystemZAddressingMode::FormBDXNormal; |
1825 | DispRange = SystemZAddressingMode::Disp20Only; |
1826 | break; |
1827 | } |
1828 | |
1829 | if (selectBDXAddr(Form, DR: DispRange, Addr: Op, Base, Disp, Index)) { |
1830 | const TargetRegisterClass *TRC = |
1831 | Subtarget->getRegisterInfo()->getPointerRegClass(MF: *MF); |
1832 | SDLoc DL(Base); |
1833 | SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32); |
1834 | |
1835 | // Make sure that the base address doesn't go into %r0. |
1836 | // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything. |
1837 | if (Base.getOpcode() != ISD::TargetFrameIndex && |
1838 | Base.getOpcode() != ISD::Register) { |
1839 | Base = |
1840 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
1841 | dl: DL, VT: Base.getValueType(), |
1842 | Op1: Base, Op2: RC), 0); |
1843 | } |
1844 | |
1845 | // Make sure that the index register isn't assigned to %r0 either. |
1846 | if (Index.getOpcode() != ISD::Register) { |
1847 | Index = |
1848 | SDValue(CurDAG->getMachineNode(Opcode: TargetOpcode::COPY_TO_REGCLASS, |
1849 | dl: DL, VT: Index.getValueType(), |
1850 | Op1: Index, Op2: RC), 0); |
1851 | } |
1852 | |
1853 | OutOps.push_back(x: Base); |
1854 | OutOps.push_back(x: Disp); |
1855 | OutOps.push_back(x: Index); |
1856 | return false; |
1857 | } |
1858 | |
1859 | return true; |
1860 | } |
1861 | |
1862 | // IsProfitableToFold - Returns true if is profitable to fold the specific |
1863 | // operand node N of U during instruction selection that starts at Root. |
1864 | bool |
1865 | SystemZDAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, |
1866 | SDNode *Root) const { |
1867 | // We want to avoid folding a LOAD into an ICMP node if as a result |
1868 | // we would be forced to spill the condition code into a GPR. |
1869 | if (N.getOpcode() == ISD::LOAD && U->getOpcode() == SystemZISD::ICMP) { |
1870 | if (!N.hasOneUse() || !U->hasOneUse()) |
1871 | return false; |
1872 | |
1873 | // The user of the CC value will usually be a CopyToReg into the |
1874 | // physical CC register, which in turn is glued and chained to the |
1875 | // actual instruction that uses the CC value. Bail out if we have |
1876 | // anything else than that. |
1877 | SDNode *CCUser = *U->use_begin(); |
1878 | SDNode *CCRegUser = nullptr; |
1879 | if (CCUser->getOpcode() == ISD::CopyToReg || |
1880 | cast<RegisterSDNode>(CCUser->getOperand(1))->getReg() == SystemZ::CC) { |
1881 | for (auto *U : CCUser->uses()) { |
1882 | if (CCRegUser == nullptr) |
1883 | CCRegUser = U; |
1884 | else if (CCRegUser != U) |
1885 | return false; |
1886 | } |
1887 | } |
1888 | if (CCRegUser == nullptr) |
1889 | return false; |
1890 | |
1891 | // If the actual instruction is a branch, the only thing that remains to be |
1892 | // checked is whether the CCUser chain is a predecessor of the load. |
1893 | if (CCRegUser->isMachineOpcode() && |
1894 | CCRegUser->getMachineOpcode() == SystemZ::BRC) |
1895 | return !N->isPredecessorOf(N: CCUser->getOperand(Num: 0).getNode()); |
1896 | |
1897 | // Otherwise, the instruction may have multiple operands, and we need to |
1898 | // verify that none of them are a predecessor of the load. This is exactly |
1899 | // the same check that would be done by common code if the CC setter were |
1900 | // glued to the CC user, so simply invoke that check here. |
1901 | if (!IsLegalToFold(N, U, Root: CCRegUser, OptLevel, IgnoreChains: false)) |
1902 | return false; |
1903 | } |
1904 | |
1905 | return true; |
1906 | } |
1907 | |
1908 | namespace { |
1909 | // Represents a sequence for extracting a 0/1 value from an IPM result: |
1910 | // (((X ^ XORValue) + AddValue) >> Bit) |
1911 | struct IPMConversion { |
1912 | IPMConversion(unsigned xorValue, int64_t addValue, unsigned bit) |
1913 | : XORValue(xorValue), AddValue(addValue), Bit(bit) {} |
1914 | |
1915 | int64_t XORValue; |
1916 | int64_t AddValue; |
1917 | unsigned Bit; |
1918 | }; |
1919 | } // end anonymous namespace |
1920 | |
1921 | // Return a sequence for getting a 1 from an IPM result when CC has a |
1922 | // value in CCMask and a 0 when CC has a value in CCValid & ~CCMask. |
1923 | // The handling of CC values outside CCValid doesn't matter. |
1924 | static IPMConversion getIPMConversion(unsigned CCValid, unsigned CCMask) { |
1925 | // Deal with cases where the result can be taken directly from a bit |
1926 | // of the IPM result. |
1927 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_3))) |
1928 | return IPMConversion(0, 0, SystemZ::IPM_CC); |
1929 | if (CCMask == (CCValid & (SystemZ::CCMASK_2 | SystemZ::CCMASK_3))) |
1930 | return IPMConversion(0, 0, SystemZ::IPM_CC + 1); |
1931 | |
1932 | // Deal with cases where we can add a value to force the sign bit |
1933 | // to contain the right value. Putting the bit in 31 means we can |
1934 | // use SRL rather than RISBG(L), and also makes it easier to get a |
1935 | // 0/-1 value, so it has priority over the other tests below. |
1936 | // |
1937 | // These sequences rely on the fact that the upper two bits of the |
1938 | // IPM result are zero. |
1939 | uint64_t TopBit = uint64_t(1) << 31; |
1940 | if (CCMask == (CCValid & SystemZ::CCMASK_0)) |
1941 | return IPMConversion(0, -(1 << SystemZ::IPM_CC), 31); |
1942 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_1))) |
1943 | return IPMConversion(0, -(2 << SystemZ::IPM_CC), 31); |
1944 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1945 | | SystemZ::CCMASK_1 |
1946 | | SystemZ::CCMASK_2))) |
1947 | return IPMConversion(0, -(3 << SystemZ::IPM_CC), 31); |
1948 | if (CCMask == (CCValid & SystemZ::CCMASK_3)) |
1949 | return IPMConversion(0, TopBit - (3 << SystemZ::IPM_CC), 31); |
1950 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 |
1951 | | SystemZ::CCMASK_2 |
1952 | | SystemZ::CCMASK_3))) |
1953 | return IPMConversion(0, TopBit - (1 << SystemZ::IPM_CC), 31); |
1954 | |
1955 | // Next try inverting the value and testing a bit. 0/1 could be |
1956 | // handled this way too, but we dealt with that case above. |
1957 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_2))) |
1958 | return IPMConversion(-1, 0, SystemZ::IPM_CC); |
1959 | |
1960 | // Handle cases where adding a value forces a non-sign bit to contain |
1961 | // the right value. |
1962 | if (CCMask == (CCValid & (SystemZ::CCMASK_1 | SystemZ::CCMASK_2))) |
1963 | return IPMConversion(0, 1 << SystemZ::IPM_CC, SystemZ::IPM_CC + 1); |
1964 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 | SystemZ::CCMASK_3))) |
1965 | return IPMConversion(0, -(1 << SystemZ::IPM_CC), SystemZ::IPM_CC + 1); |
1966 | |
1967 | // The remaining cases are 1, 2, 0/1/3 and 0/2/3. All these are |
1968 | // can be done by inverting the low CC bit and applying one of the |
1969 | // sign-based extractions above. |
1970 | if (CCMask == (CCValid & SystemZ::CCMASK_1)) |
1971 | return IPMConversion(1 << SystemZ::IPM_CC, -(1 << SystemZ::IPM_CC), 31); |
1972 | if (CCMask == (CCValid & SystemZ::CCMASK_2)) |
1973 | return IPMConversion(1 << SystemZ::IPM_CC, |
1974 | TopBit - (3 << SystemZ::IPM_CC), 31); |
1975 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1976 | | SystemZ::CCMASK_1 |
1977 | | SystemZ::CCMASK_3))) |
1978 | return IPMConversion(1 << SystemZ::IPM_CC, -(3 << SystemZ::IPM_CC), 31); |
1979 | if (CCMask == (CCValid & (SystemZ::CCMASK_0 |
1980 | | SystemZ::CCMASK_2 |
1981 | | SystemZ::CCMASK_3))) |
1982 | return IPMConversion(1 << SystemZ::IPM_CC, |
1983 | TopBit - (1 << SystemZ::IPM_CC), 31); |
1984 | |
1985 | llvm_unreachable("Unexpected CC combination" ); |
1986 | } |
1987 | |
1988 | SDValue SystemZDAGToDAGISel::expandSelectBoolean(SDNode *Node) { |
1989 | auto *TrueOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 0)); |
1990 | auto *FalseOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 1)); |
1991 | if (!TrueOp || !FalseOp) |
1992 | return SDValue(); |
1993 | if (FalseOp->getZExtValue() != 0) |
1994 | return SDValue(); |
1995 | if (TrueOp->getSExtValue() != 1 && TrueOp->getSExtValue() != -1) |
1996 | return SDValue(); |
1997 | |
1998 | auto *CCValidOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 2)); |
1999 | auto *CCMaskOp = dyn_cast<ConstantSDNode>(Val: Node->getOperand(Num: 3)); |
2000 | if (!CCValidOp || !CCMaskOp) |
2001 | return SDValue(); |
2002 | int CCValid = CCValidOp->getZExtValue(); |
2003 | int CCMask = CCMaskOp->getZExtValue(); |
2004 | |
2005 | SDLoc DL(Node); |
2006 | SDValue CCReg = Node->getOperand(Num: 4); |
2007 | IPMConversion IPM = getIPMConversion(CCValid, CCMask); |
2008 | SDValue Result = CurDAG->getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); |
2009 | |
2010 | if (IPM.XORValue) |
2011 | Result = CurDAG->getNode(ISD::XOR, DL, MVT::i32, Result, |
2012 | CurDAG->getConstant(IPM.XORValue, DL, MVT::i32)); |
2013 | |
2014 | if (IPM.AddValue) |
2015 | Result = CurDAG->getNode(ISD::ADD, DL, MVT::i32, Result, |
2016 | CurDAG->getConstant(IPM.AddValue, DL, MVT::i32)); |
2017 | |
2018 | EVT VT = Node->getValueType(ResNo: 0); |
2019 | if (VT == MVT::i32 && IPM.Bit == 31) { |
2020 | unsigned ShiftOp = TrueOp->getSExtValue() == 1 ? ISD::SRL : ISD::SRA; |
2021 | Result = CurDAG->getNode(ShiftOp, DL, MVT::i32, Result, |
2022 | CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); |
2023 | } else { |
2024 | if (VT != MVT::i32) |
2025 | Result = CurDAG->getNode(Opcode: ISD::ANY_EXTEND, DL, VT, Operand: Result); |
2026 | |
2027 | if (TrueOp->getSExtValue() == 1) { |
2028 | // The SHR/AND sequence should get optimized to an RISBG. |
2029 | Result = CurDAG->getNode(ISD::SRL, DL, VT, Result, |
2030 | CurDAG->getConstant(IPM.Bit, DL, MVT::i32)); |
2031 | Result = CurDAG->getNode(Opcode: ISD::AND, DL, VT, N1: Result, |
2032 | N2: CurDAG->getConstant(Val: 1, DL, VT)); |
2033 | } else { |
2034 | // Sign-extend from IPM.Bit using a pair of shifts. |
2035 | int ShlAmt = VT.getSizeInBits() - 1 - IPM.Bit; |
2036 | int SraAmt = VT.getSizeInBits() - 1; |
2037 | Result = CurDAG->getNode(ISD::SHL, DL, VT, Result, |
2038 | CurDAG->getConstant(ShlAmt, DL, MVT::i32)); |
2039 | Result = CurDAG->getNode(ISD::SRA, DL, VT, Result, |
2040 | CurDAG->getConstant(SraAmt, DL, MVT::i32)); |
2041 | } |
2042 | } |
2043 | |
2044 | return Result; |
2045 | } |
2046 | |
2047 | void SystemZDAGToDAGISel::PreprocessISelDAG() { |
2048 | // If we have conditional immediate loads, we always prefer |
2049 | // using those over an IPM sequence. |
2050 | if (Subtarget->hasLoadStoreOnCond2()) |
2051 | return; |
2052 | |
2053 | bool MadeChange = false; |
2054 | |
2055 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), |
2056 | E = CurDAG->allnodes_end(); |
2057 | I != E;) { |
2058 | SDNode *N = &*I++; |
2059 | if (N->use_empty()) |
2060 | continue; |
2061 | |
2062 | SDValue Res; |
2063 | switch (N->getOpcode()) { |
2064 | default: break; |
2065 | case SystemZISD::SELECT_CCMASK: |
2066 | Res = expandSelectBoolean(Node: N); |
2067 | break; |
2068 | } |
2069 | |
2070 | if (Res) { |
2071 | LLVM_DEBUG(dbgs() << "SystemZ DAG preprocessing replacing:\nOld: " ); |
2072 | LLVM_DEBUG(N->dump(CurDAG)); |
2073 | LLVM_DEBUG(dbgs() << "\nNew: " ); |
2074 | LLVM_DEBUG(Res.getNode()->dump(CurDAG)); |
2075 | LLVM_DEBUG(dbgs() << "\n" ); |
2076 | |
2077 | CurDAG->ReplaceAllUsesOfValueWith(From: SDValue(N, 0), To: Res); |
2078 | MadeChange = true; |
2079 | } |
2080 | } |
2081 | |
2082 | if (MadeChange) |
2083 | CurDAG->RemoveDeadNodes(); |
2084 | } |
2085 | |