1 | //===-- SystemZElimCompare.cpp - Eliminate comparison instructions --------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass: |
10 | // (1) tries to remove compares if CC already contains the required information |
11 | // (2) fuses compares and branches into COMPARE AND BRANCH instructions |
12 | // |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #include "SystemZ.h" |
16 | #include "SystemZInstrInfo.h" |
17 | #include "SystemZTargetMachine.h" |
18 | #include "llvm/ADT/SmallVector.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/ADT/StringRef.h" |
21 | #include "llvm/CodeGen/LiveRegUnits.h" |
22 | #include "llvm/CodeGen/MachineBasicBlock.h" |
23 | #include "llvm/CodeGen/MachineFunction.h" |
24 | #include "llvm/CodeGen/MachineFunctionPass.h" |
25 | #include "llvm/CodeGen/MachineInstr.h" |
26 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
27 | #include "llvm/CodeGen/MachineOperand.h" |
28 | #include "llvm/CodeGen/TargetRegisterInfo.h" |
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
30 | #include "llvm/MC/MCInstrDesc.h" |
31 | #include <cassert> |
32 | #include <cstdint> |
33 | |
34 | using namespace llvm; |
35 | |
36 | #define DEBUG_TYPE "systemz-elim-compare" |
37 | |
38 | STATISTIC(BranchOnCounts, "Number of branch-on-count instructions" ); |
39 | STATISTIC(LoadAndTraps, "Number of load-and-trap instructions" ); |
40 | STATISTIC(EliminatedComparisons, "Number of eliminated comparisons" ); |
41 | STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions" ); |
42 | |
43 | namespace { |
44 | |
45 | // Represents the references to a particular register in one or more |
46 | // instructions. |
47 | struct Reference { |
48 | Reference() = default; |
49 | |
50 | Reference &operator|=(const Reference &Other) { |
51 | Def |= Other.Def; |
52 | Use |= Other.Use; |
53 | return *this; |
54 | } |
55 | |
56 | explicit operator bool() const { return Def || Use; } |
57 | |
58 | // True if the register is defined or used in some form, either directly or |
59 | // via a sub- or super-register. |
60 | bool Def = false; |
61 | bool Use = false; |
62 | }; |
63 | |
64 | class SystemZElimCompare : public MachineFunctionPass { |
65 | public: |
66 | static char ID; |
67 | |
68 | SystemZElimCompare() : MachineFunctionPass(ID) { |
69 | initializeSystemZElimComparePass(*PassRegistry::getPassRegistry()); |
70 | } |
71 | |
72 | bool processBlock(MachineBasicBlock &MBB); |
73 | bool runOnMachineFunction(MachineFunction &F) override; |
74 | |
75 | MachineFunctionProperties getRequiredProperties() const override { |
76 | return MachineFunctionProperties().set( |
77 | MachineFunctionProperties::Property::NoVRegs); |
78 | } |
79 | |
80 | private: |
81 | Reference getRegReferences(MachineInstr &MI, unsigned Reg); |
82 | bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare, |
83 | SmallVectorImpl<MachineInstr *> &CCUsers); |
84 | bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare, |
85 | SmallVectorImpl<MachineInstr *> &CCUsers); |
86 | bool convertToLoadAndTest(MachineInstr &MI, MachineInstr &Compare, |
87 | SmallVectorImpl<MachineInstr *> &CCUsers); |
88 | bool convertToLogical(MachineInstr &MI, MachineInstr &Compare, |
89 | SmallVectorImpl<MachineInstr *> &CCUsers); |
90 | bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare, |
91 | SmallVectorImpl<MachineInstr *> &CCUsers, |
92 | unsigned ConvOpc = 0); |
93 | bool optimizeCompareZero(MachineInstr &Compare, |
94 | SmallVectorImpl<MachineInstr *> &CCUsers); |
95 | bool fuseCompareOperations(MachineInstr &Compare, |
96 | SmallVectorImpl<MachineInstr *> &CCUsers); |
97 | |
98 | const SystemZInstrInfo *TII = nullptr; |
99 | const TargetRegisterInfo *TRI = nullptr; |
100 | }; |
101 | |
102 | char SystemZElimCompare::ID = 0; |
103 | |
104 | } // end anonymous namespace |
105 | |
106 | INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE, |
107 | "SystemZ Comparison Elimination" , false, false) |
108 | |
109 | // Returns true if MI is an instruction whose output equals the value in Reg. |
110 | static bool preservesValueOf(MachineInstr &MI, unsigned Reg) { |
111 | switch (MI.getOpcode()) { |
112 | case SystemZ::LR: |
113 | case SystemZ::LGR: |
114 | case SystemZ::LGFR: |
115 | case SystemZ::LTR: |
116 | case SystemZ::LTGR: |
117 | case SystemZ::LTGFR: |
118 | if (MI.getOperand(i: 1).getReg() == Reg) |
119 | return true; |
120 | } |
121 | |
122 | return false; |
123 | } |
124 | |
125 | // Return true if any CC result of MI would (perhaps after conversion) |
126 | // reflect the value of Reg. |
127 | static bool resultTests(MachineInstr &MI, unsigned Reg) { |
128 | if (MI.getNumOperands() > 0 && MI.getOperand(i: 0).isReg() && |
129 | MI.getOperand(i: 0).isDef() && MI.getOperand(i: 0).getReg() == Reg) |
130 | return true; |
131 | |
132 | return (preservesValueOf(MI, Reg)); |
133 | } |
134 | |
135 | // Describe the references to Reg or any of its aliases in MI. |
136 | Reference SystemZElimCompare::getRegReferences(MachineInstr &MI, unsigned Reg) { |
137 | Reference Ref; |
138 | if (MI.isDebugInstr()) |
139 | return Ref; |
140 | |
141 | for (const MachineOperand &MO : MI.operands()) { |
142 | if (MO.isReg()) { |
143 | if (Register MOReg = MO.getReg()) { |
144 | if (TRI->regsOverlap(RegA: MOReg, RegB: Reg)) { |
145 | if (MO.isUse()) |
146 | Ref.Use = true; |
147 | else if (MO.isDef()) |
148 | Ref.Def = true; |
149 | } |
150 | } |
151 | } |
152 | } |
153 | return Ref; |
154 | } |
155 | |
156 | // Return true if this is a load and test which can be optimized the |
157 | // same way as compare instruction. |
158 | static bool isLoadAndTestAsCmp(MachineInstr &MI) { |
159 | // If we during isel used a load-and-test as a compare with 0, the |
160 | // def operand is dead. |
161 | return (MI.getOpcode() == SystemZ::LTEBR || |
162 | MI.getOpcode() == SystemZ::LTDBR || |
163 | MI.getOpcode() == SystemZ::LTXBR) && |
164 | MI.getOperand(i: 0).isDead(); |
165 | } |
166 | |
167 | // Return the source register of Compare, which is the unknown value |
168 | // being tested. |
169 | static unsigned getCompareSourceReg(MachineInstr &Compare) { |
170 | unsigned reg = 0; |
171 | if (Compare.isCompare()) |
172 | reg = Compare.getOperand(i: 0).getReg(); |
173 | else if (isLoadAndTestAsCmp(MI&: Compare)) |
174 | reg = Compare.getOperand(i: 1).getReg(); |
175 | assert(reg); |
176 | |
177 | return reg; |
178 | } |
179 | |
180 | // Compare compares the result of MI against zero. If MI is an addition |
181 | // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition |
182 | // and convert the branch to a BRCT(G) or BRCTH. Return true on success. |
183 | bool SystemZElimCompare::convertToBRCT( |
184 | MachineInstr &MI, MachineInstr &Compare, |
185 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
186 | // Check whether we have an addition of -1. |
187 | unsigned Opcode = MI.getOpcode(); |
188 | unsigned BRCT; |
189 | if (Opcode == SystemZ::AHI) |
190 | BRCT = SystemZ::BRCT; |
191 | else if (Opcode == SystemZ::AGHI) |
192 | BRCT = SystemZ::BRCTG; |
193 | else if (Opcode == SystemZ::AIH) |
194 | BRCT = SystemZ::BRCTH; |
195 | else |
196 | return false; |
197 | if (MI.getOperand(i: 2).getImm() != -1) |
198 | return false; |
199 | |
200 | // Check whether we have a single JLH. |
201 | if (CCUsers.size() != 1) |
202 | return false; |
203 | MachineInstr *Branch = CCUsers[0]; |
204 | if (Branch->getOpcode() != SystemZ::BRC || |
205 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
206 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_NE) |
207 | return false; |
208 | |
209 | // We already know that there are no references to the register between |
210 | // MI and Compare. Make sure that there are also no references between |
211 | // Compare and Branch. |
212 | unsigned SrcReg = getCompareSourceReg(Compare); |
213 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
214 | for (++MBBI; MBBI != MBBE; ++MBBI) |
215 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
216 | return false; |
217 | |
218 | // The transformation is OK. Rebuild Branch as a BRCT(G) or BRCTH. |
219 | MachineOperand Target(Branch->getOperand(i: 2)); |
220 | while (Branch->getNumOperands()) |
221 | Branch->removeOperand(OpNo: 0); |
222 | Branch->setDesc(TII->get(BRCT)); |
223 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
224 | MIB.add(MO: MI.getOperand(i: 0)).add(MO: MI.getOperand(i: 1)).add(MO: Target); |
225 | // Add a CC def to BRCT(G), since we may have to split them again if the |
226 | // branch displacement overflows. BRCTH has a 32-bit displacement, so |
227 | // this is not necessary there. |
228 | if (BRCT != SystemZ::BRCTH) |
229 | MIB.addReg(SystemZ::RegNo: CC, flags: RegState::ImplicitDefine | RegState::Dead); |
230 | MI.eraseFromParent(); |
231 | return true; |
232 | } |
233 | |
234 | // Compare compares the result of MI against zero. If MI is a suitable load |
235 | // instruction and if CCUsers is a single conditional trap on zero, eliminate |
236 | // the load and convert the branch to a load-and-trap. Return true on success. |
237 | bool SystemZElimCompare::convertToLoadAndTrap( |
238 | MachineInstr &MI, MachineInstr &Compare, |
239 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
240 | unsigned LATOpcode = TII->getLoadAndTrap(Opcode: MI.getOpcode()); |
241 | if (!LATOpcode) |
242 | return false; |
243 | |
244 | // Check whether we have a single CondTrap that traps on zero. |
245 | if (CCUsers.size() != 1) |
246 | return false; |
247 | MachineInstr *Branch = CCUsers[0]; |
248 | if (Branch->getOpcode() != SystemZ::CondTrap || |
249 | Branch->getOperand(i: 0).getImm() != SystemZ::CCMASK_ICMP || |
250 | Branch->getOperand(i: 1).getImm() != SystemZ::CCMASK_CMP_EQ) |
251 | return false; |
252 | |
253 | // We already know that there are no references to the register between |
254 | // MI and Compare. Make sure that there are also no references between |
255 | // Compare and Branch. |
256 | unsigned SrcReg = getCompareSourceReg(Compare); |
257 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
258 | for (++MBBI; MBBI != MBBE; ++MBBI) |
259 | if (getRegReferences(MI&: *MBBI, Reg: SrcReg)) |
260 | return false; |
261 | |
262 | // The transformation is OK. Rebuild Branch as a load-and-trap. |
263 | while (Branch->getNumOperands()) |
264 | Branch->removeOperand(OpNo: 0); |
265 | Branch->setDesc(TII->get(LATOpcode)); |
266 | MachineInstrBuilder(*Branch->getParent()->getParent(), Branch) |
267 | .add(MO: MI.getOperand(i: 0)) |
268 | .add(MO: MI.getOperand(i: 1)) |
269 | .add(MO: MI.getOperand(i: 2)) |
270 | .add(MO: MI.getOperand(i: 3)); |
271 | MI.eraseFromParent(); |
272 | return true; |
273 | } |
274 | |
275 | // If MI is a load instruction, try to convert it into a LOAD AND TEST. |
276 | // Return true on success. |
277 | bool SystemZElimCompare::convertToLoadAndTest( |
278 | MachineInstr &MI, MachineInstr &Compare, |
279 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
280 | |
281 | // Try to adjust CC masks for the LOAD AND TEST opcode that could replace MI. |
282 | unsigned Opcode = TII->getLoadAndTest(Opcode: MI.getOpcode()); |
283 | if (!Opcode || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc: Opcode)) |
284 | return false; |
285 | |
286 | // Rebuild to get the CC operand in the right place. |
287 | auto MIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode)); |
288 | for (const auto &MO : MI.operands()) |
289 | MIB.add(MO); |
290 | MIB.setMemRefs(MI.memoperands()); |
291 | MI.eraseFromParent(); |
292 | |
293 | // Mark instruction as not raising an FP exception if applicable. We already |
294 | // verified earlier that this move is valid. |
295 | if (!Compare.mayRaiseFPException()) |
296 | MIB.setMIFlag(MachineInstr::MIFlag::NoFPExcept); |
297 | |
298 | return true; |
299 | } |
300 | |
301 | // See if MI is an instruction with an equivalent "logical" opcode that can |
302 | // be used and replace MI. This is useful for EQ/NE comparisons where the |
303 | // "nsw" flag is missing since the "logical" opcode always sets CC to reflect |
304 | // the result being zero or non-zero. |
305 | bool SystemZElimCompare::convertToLogical( |
306 | MachineInstr &MI, MachineInstr &Compare, |
307 | SmallVectorImpl<MachineInstr *> &CCUsers) { |
308 | |
309 | unsigned ConvOpc = 0; |
310 | switch (MI.getOpcode()) { |
311 | case SystemZ::AR: ConvOpc = SystemZ::ALR; break; |
312 | case SystemZ::ARK: ConvOpc = SystemZ::ALRK; break; |
313 | case SystemZ::AGR: ConvOpc = SystemZ::ALGR; break; |
314 | case SystemZ::AGRK: ConvOpc = SystemZ::ALGRK; break; |
315 | case SystemZ::A: ConvOpc = SystemZ::AL; break; |
316 | case SystemZ::AY: ConvOpc = SystemZ::ALY; break; |
317 | case SystemZ::AG: ConvOpc = SystemZ::ALG; break; |
318 | default: break; |
319 | } |
320 | if (!ConvOpc || !adjustCCMasksForInstr(MI, Compare, CCUsers, ConvOpc)) |
321 | return false; |
322 | |
323 | // Operands should be identical, so just change the opcode and remove the |
324 | // dead flag on CC. |
325 | MI.setDesc(TII->get(ConvOpc)); |
326 | MI.clearRegisterDeads(SystemZ::Reg: CC); |
327 | return true; |
328 | } |
329 | |
330 | #ifndef NDEBUG |
331 | static bool isAddWithImmediate(unsigned Opcode) { |
332 | switch(Opcode) { |
333 | case SystemZ::AHI: |
334 | case SystemZ::AHIK: |
335 | case SystemZ::AGHI: |
336 | case SystemZ::AGHIK: |
337 | case SystemZ::AFI: |
338 | case SystemZ::AIH: |
339 | case SystemZ::AGFI: |
340 | return true; |
341 | default: break; |
342 | } |
343 | return false; |
344 | } |
345 | #endif |
346 | |
347 | // The CC users in CCUsers are testing the result of a comparison of some |
348 | // value X against zero and we know that any CC value produced by MI would |
349 | // also reflect the value of X. ConvOpc may be used to pass the transfomed |
350 | // opcode MI will have if this succeeds. Try to adjust CCUsers so that they |
351 | // test the result of MI directly, returning true on success. Leave |
352 | // everything unchanged on failure. |
353 | bool SystemZElimCompare::adjustCCMasksForInstr( |
354 | MachineInstr &MI, MachineInstr &Compare, |
355 | SmallVectorImpl<MachineInstr *> &CCUsers, |
356 | unsigned ConvOpc) { |
357 | unsigned CompareFlags = Compare.getDesc().TSFlags; |
358 | unsigned CompareCCValues = SystemZII::getCCValues(Flags: CompareFlags); |
359 | int Opcode = (ConvOpc ? ConvOpc : MI.getOpcode()); |
360 | const MCInstrDesc &Desc = TII->get(Opcode); |
361 | unsigned MIFlags = Desc.TSFlags; |
362 | |
363 | // If Compare may raise an FP exception, we can only eliminate it |
364 | // if MI itself would have already raised the exception. |
365 | if (Compare.mayRaiseFPException()) { |
366 | // If the caller will change MI to use ConvOpc, only test whether |
367 | // ConvOpc is suitable; it is on the caller to set the MI flag. |
368 | if (ConvOpc && !Desc.mayRaiseFPException()) |
369 | return false; |
370 | // If the caller will not change MI, we test the MI flag here. |
371 | if (!ConvOpc && !MI.mayRaiseFPException()) |
372 | return false; |
373 | } |
374 | |
375 | // See which compare-style condition codes are available. |
376 | unsigned CCValues = SystemZII::getCCValues(Flags: MIFlags); |
377 | unsigned ReusableCCMask = CCValues; |
378 | // For unsigned comparisons with zero, only equality makes sense. |
379 | if (CompareFlags & SystemZII::IsLogical) |
380 | ReusableCCMask &= SystemZ::CCMASK_CMP_EQ; |
381 | unsigned OFImplies = 0; |
382 | bool LogicalMI = false; |
383 | bool MIEquivalentToCmp = false; |
384 | if (MI.getFlag(Flag: MachineInstr::NoSWrap) && |
385 | (MIFlags & SystemZII::CCIfNoSignedWrap)) { |
386 | // If MI has the NSW flag set in combination with the |
387 | // SystemZII::CCIfNoSignedWrap flag, all CCValues are valid. |
388 | } |
389 | else if ((MIFlags & SystemZII::CCIfNoSignedWrap) && |
390 | MI.getOperand(i: 2).isImm()) { |
391 | // Signed addition of immediate. If adding a positive immediate |
392 | // overflows, the result must be less than zero. If adding a negative |
393 | // immediate overflows, the result must be larger than zero (except in |
394 | // the special case of adding the minimum value of the result range, in |
395 | // which case we cannot predict whether the result is larger than or |
396 | // equal to zero). |
397 | assert(isAddWithImmediate(Opcode) && "Expected an add with immediate." ); |
398 | assert(!MI.mayLoadOrStore() && "Expected an immediate term." ); |
399 | int64_t RHS = MI.getOperand(i: 2).getImm(); |
400 | if (SystemZ::GRX32BitRegClass.contains(MI.getOperand(0).getReg()) && |
401 | RHS == INT32_MIN) |
402 | return false; |
403 | OFImplies = (RHS > 0 ? SystemZ::CCMASK_CMP_LT : SystemZ::CCMASK_CMP_GT); |
404 | } |
405 | else if ((MIFlags & SystemZII::IsLogical) && CCValues) { |
406 | // Use CCMASK_CMP_EQ to match with CCUsers. On success CCMask:s will be |
407 | // converted to CCMASK_LOGICAL_ZERO or CCMASK_LOGICAL_NONZERO. |
408 | LogicalMI = true; |
409 | ReusableCCMask = SystemZ::CCMASK_CMP_EQ; |
410 | } |
411 | else { |
412 | ReusableCCMask &= SystemZII::getCompareZeroCCMask(Flags: MIFlags); |
413 | assert((ReusableCCMask & ~CCValues) == 0 && "Invalid CCValues" ); |
414 | MIEquivalentToCmp = |
415 | ReusableCCMask == CCValues && CCValues == CompareCCValues; |
416 | } |
417 | if (ReusableCCMask == 0) |
418 | return false; |
419 | |
420 | if (!MIEquivalentToCmp) { |
421 | // Now check whether these flags are enough for all users. |
422 | SmallVector<MachineOperand *, 4> AlterMasks; |
423 | for (unsigned int I = 0, E = CCUsers.size(); I != E; ++I) { |
424 | MachineInstr *CCUserMI = CCUsers[I]; |
425 | |
426 | // Fail if this isn't a use of CC that we understand. |
427 | unsigned Flags = CCUserMI->getDesc().TSFlags; |
428 | unsigned FirstOpNum; |
429 | if (Flags & SystemZII::CCMaskFirst) |
430 | FirstOpNum = 0; |
431 | else if (Flags & SystemZII::CCMaskLast) |
432 | FirstOpNum = CCUserMI->getNumExplicitOperands() - 2; |
433 | else |
434 | return false; |
435 | |
436 | // Check whether the instruction predicate treats all CC values |
437 | // outside of ReusableCCMask in the same way. In that case it |
438 | // doesn't matter what those CC values mean. |
439 | unsigned CCValid = CCUserMI->getOperand(i: FirstOpNum).getImm(); |
440 | unsigned CCMask = CCUserMI->getOperand(i: FirstOpNum + 1).getImm(); |
441 | assert(CCValid == CompareCCValues && (CCMask & ~CCValid) == 0 && |
442 | "Corrupt CC operands of CCUser." ); |
443 | unsigned OutValid = ~ReusableCCMask & CCValid; |
444 | unsigned OutMask = ~ReusableCCMask & CCMask; |
445 | if (OutMask != 0 && OutMask != OutValid) |
446 | return false; |
447 | |
448 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum)); |
449 | AlterMasks.push_back(Elt: &CCUserMI->getOperand(i: FirstOpNum + 1)); |
450 | } |
451 | |
452 | // All users are OK. Adjust the masks for MI. |
453 | for (unsigned I = 0, E = AlterMasks.size(); I != E; I += 2) { |
454 | AlterMasks[I]->setImm(CCValues); |
455 | unsigned CCMask = AlterMasks[I + 1]->getImm(); |
456 | if (LogicalMI) { |
457 | // Translate the CCMask into its "logical" value. |
458 | CCMask = (CCMask == SystemZ::CCMASK_CMP_EQ ? |
459 | SystemZ::CCMASK_LOGICAL_ZERO : SystemZ::CCMASK_LOGICAL_NONZERO); |
460 | CCMask &= CCValues; // Logical subtracts never set CC=0. |
461 | } else { |
462 | if (CCMask & ~ReusableCCMask) |
463 | CCMask = (CCMask & ReusableCCMask) | (CCValues & ~ReusableCCMask); |
464 | CCMask |= (CCMask & OFImplies) ? SystemZ::CCMASK_ARITH_OVERFLOW : 0; |
465 | } |
466 | AlterMasks[I + 1]->setImm(CCMask); |
467 | } |
468 | } |
469 | |
470 | // CC is now live after MI. |
471 | if (!ConvOpc) |
472 | MI.clearRegisterDeads(SystemZ::CC); |
473 | |
474 | // Check if MI lies before Compare. |
475 | bool BeforeCmp = false; |
476 | MachineBasicBlock::iterator MBBI = MI, MBBE = MI.getParent()->end(); |
477 | for (++MBBI; MBBI != MBBE; ++MBBI) |
478 | if (MBBI == Compare) { |
479 | BeforeCmp = true; |
480 | break; |
481 | } |
482 | |
483 | // Clear any intervening kills of CC. |
484 | if (BeforeCmp) { |
485 | MachineBasicBlock::iterator MBBI = MI, MBBE = Compare; |
486 | for (++MBBI; MBBI != MBBE; ++MBBI) |
487 | MBBI->clearRegisterKills(SystemZ::CC, TRI); |
488 | } |
489 | |
490 | return true; |
491 | } |
492 | |
493 | // Return true if Compare is a comparison against zero. |
494 | static bool isCompareZero(MachineInstr &Compare) { |
495 | if (isLoadAndTestAsCmp(MI&: Compare)) |
496 | return true; |
497 | return Compare.getNumExplicitOperands() == 2 && |
498 | Compare.getOperand(i: 1).isImm() && Compare.getOperand(i: 1).getImm() == 0; |
499 | } |
500 | |
501 | // Try to optimize cases where comparison instruction Compare is testing |
502 | // a value against zero. Return true on success and if Compare should be |
503 | // deleted as dead. CCUsers is the list of instructions that use the CC |
504 | // value produced by Compare. |
505 | bool SystemZElimCompare::optimizeCompareZero( |
506 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
507 | if (!isCompareZero(Compare)) |
508 | return false; |
509 | |
510 | // Search back for CC results that are based on the first operand. |
511 | unsigned SrcReg = getCompareSourceReg(Compare); |
512 | MachineBasicBlock &MBB = *Compare.getParent(); |
513 | Reference CCRefs; |
514 | Reference SrcRefs; |
515 | for (MachineBasicBlock::reverse_iterator MBBI = |
516 | std::next(x: MachineBasicBlock::reverse_iterator(&Compare)), |
517 | MBBE = MBB.rend(); MBBI != MBBE;) { |
518 | MachineInstr &MI = *MBBI++; |
519 | if (resultTests(MI, Reg: SrcReg)) { |
520 | // Try to remove both MI and Compare by converting a branch to BRCT(G). |
521 | // or a load-and-trap instruction. We don't care in this case whether |
522 | // CC is modified between MI and Compare. |
523 | if (!CCRefs.Use && !SrcRefs) { |
524 | if (convertToBRCT(MI, Compare, CCUsers)) { |
525 | BranchOnCounts += 1; |
526 | return true; |
527 | } |
528 | if (convertToLoadAndTrap(MI, Compare, CCUsers)) { |
529 | LoadAndTraps += 1; |
530 | return true; |
531 | } |
532 | } |
533 | // Try to eliminate Compare by reusing a CC result from MI. |
534 | if ((!CCRefs && convertToLoadAndTest(MI, Compare, CCUsers)) || |
535 | (!CCRefs.Def && |
536 | (adjustCCMasksForInstr(MI, Compare, CCUsers) || |
537 | convertToLogical(MI, Compare, CCUsers)))) { |
538 | EliminatedComparisons += 1; |
539 | return true; |
540 | } |
541 | } |
542 | SrcRefs |= getRegReferences(MI, Reg: SrcReg); |
543 | if (SrcRefs.Def) |
544 | break; |
545 | CCRefs |= getRegReferences(MI, SystemZ::CC); |
546 | if (CCRefs.Use && CCRefs.Def) |
547 | break; |
548 | // Eliminating a Compare that may raise an FP exception will move |
549 | // raising the exception to some earlier MI. We cannot do this if |
550 | // there is anything in between that might change exception flags. |
551 | if (Compare.mayRaiseFPException() && |
552 | (MI.isCall() || MI.hasUnmodeledSideEffects())) |
553 | break; |
554 | } |
555 | |
556 | // Also do a forward search to handle cases where an instruction after the |
557 | // compare can be converted, like |
558 | // CGHI %r0d, 0; %r1d = LGR %r0d => LTGR %r1d, %r0d |
559 | auto MIRange = llvm::make_range( |
560 | x: std::next(x: MachineBasicBlock::iterator(&Compare)), y: MBB.end()); |
561 | for (MachineInstr &MI : llvm::make_early_inc_range(Range&: MIRange)) { |
562 | if (preservesValueOf(MI, Reg: SrcReg)) { |
563 | // Try to eliminate Compare by reusing a CC result from MI. |
564 | if (convertToLoadAndTest(MI, Compare, CCUsers)) { |
565 | EliminatedComparisons += 1; |
566 | return true; |
567 | } |
568 | } |
569 | if (getRegReferences(MI, Reg: SrcReg).Def) |
570 | return false; |
571 | if (getRegReferences(MI, SystemZ::CC)) |
572 | return false; |
573 | } |
574 | |
575 | return false; |
576 | } |
577 | |
578 | // Try to fuse comparison instruction Compare into a later branch. |
579 | // Return true on success and if Compare is therefore redundant. |
580 | bool SystemZElimCompare::fuseCompareOperations( |
581 | MachineInstr &Compare, SmallVectorImpl<MachineInstr *> &CCUsers) { |
582 | // See whether we have a single branch with which to fuse. |
583 | if (CCUsers.size() != 1) |
584 | return false; |
585 | MachineInstr *Branch = CCUsers[0]; |
586 | SystemZII::FusedCompareType Type; |
587 | switch (Branch->getOpcode()) { |
588 | case SystemZ::BRC: |
589 | Type = SystemZII::CompareAndBranch; |
590 | break; |
591 | case SystemZ::CondReturn: |
592 | Type = SystemZII::CompareAndReturn; |
593 | break; |
594 | case SystemZ::CallBCR: |
595 | Type = SystemZII::CompareAndSibcall; |
596 | break; |
597 | case SystemZ::CondTrap: |
598 | Type = SystemZII::CompareAndTrap; |
599 | break; |
600 | default: |
601 | return false; |
602 | } |
603 | |
604 | // See whether we have a comparison that can be fused. |
605 | unsigned FusedOpcode = |
606 | TII->getFusedCompare(Opcode: Compare.getOpcode(), Type, MI: &Compare); |
607 | if (!FusedOpcode) |
608 | return false; |
609 | |
610 | // Make sure that the operands are available at the branch. |
611 | // SrcReg2 is the register if the source operand is a register, |
612 | // 0 if the source operand is immediate, and the base register |
613 | // if the source operand is memory (index is not supported). |
614 | Register SrcReg = Compare.getOperand(i: 0).getReg(); |
615 | Register SrcReg2 = |
616 | Compare.getOperand(i: 1).isReg() ? Compare.getOperand(i: 1).getReg() : Register(); |
617 | MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch; |
618 | for (++MBBI; MBBI != MBBE; ++MBBI) |
619 | if (MBBI->modifiesRegister(Reg: SrcReg, TRI) || |
620 | (SrcReg2 && MBBI->modifiesRegister(Reg: SrcReg2, TRI))) |
621 | return false; |
622 | |
623 | // Read the branch mask, target (if applicable), regmask (if applicable). |
624 | MachineOperand CCMask(MBBI->getOperand(i: 1)); |
625 | assert((CCMask.getImm() & ~SystemZ::CCMASK_ICMP) == 0 && |
626 | "Invalid condition-code mask for integer comparison" ); |
627 | // This is only valid for CompareAndBranch and CompareAndSibcall. |
628 | MachineOperand Target(MBBI->getOperand( |
629 | i: (Type == SystemZII::CompareAndBranch || |
630 | Type == SystemZII::CompareAndSibcall) ? 2 : 0)); |
631 | const uint32_t *RegMask; |
632 | if (Type == SystemZII::CompareAndSibcall) |
633 | RegMask = MBBI->getOperand(i: 3).getRegMask(); |
634 | |
635 | // Clear out all current operands. |
636 | int CCUse = MBBI->findRegisterUseOperandIdx(SystemZ::CC, TRI, false); |
637 | assert(CCUse >= 0 && "BRC/BCR must use CC" ); |
638 | Branch->removeOperand(OpNo: CCUse); |
639 | // Remove regmask (sibcall). |
640 | if (Type == SystemZII::CompareAndSibcall) |
641 | Branch->removeOperand(OpNo: 3); |
642 | // Remove target (branch or sibcall). |
643 | if (Type == SystemZII::CompareAndBranch || |
644 | Type == SystemZII::CompareAndSibcall) |
645 | Branch->removeOperand(OpNo: 2); |
646 | Branch->removeOperand(OpNo: 1); |
647 | Branch->removeOperand(OpNo: 0); |
648 | |
649 | // Rebuild Branch as a fused compare and branch. |
650 | // SrcNOps is the number of MI operands of the compare instruction |
651 | // that we need to copy over. |
652 | unsigned SrcNOps = 2; |
653 | if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT) |
654 | SrcNOps = 3; |
655 | Branch->setDesc(TII->get(FusedOpcode)); |
656 | MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch); |
657 | for (unsigned I = 0; I < SrcNOps; I++) |
658 | MIB.add(MO: Compare.getOperand(i: I)); |
659 | MIB.add(MO: CCMask); |
660 | |
661 | if (Type == SystemZII::CompareAndBranch) { |
662 | // Only conditional branches define CC, as they may be converted back |
663 | // to a non-fused branch because of a long displacement. Conditional |
664 | // returns don't have that problem. |
665 | MIB.add(Target).addReg(SystemZ::CC, |
666 | RegState::ImplicitDefine | RegState::Dead); |
667 | } |
668 | |
669 | if (Type == SystemZII::CompareAndSibcall) { |
670 | MIB.add(MO: Target); |
671 | MIB.addRegMask(Mask: RegMask); |
672 | } |
673 | |
674 | // Clear any intervening kills of SrcReg and SrcReg2. |
675 | MBBI = Compare; |
676 | for (++MBBI; MBBI != MBBE; ++MBBI) { |
677 | MBBI->clearRegisterKills(Reg: SrcReg, RegInfo: TRI); |
678 | if (SrcReg2) |
679 | MBBI->clearRegisterKills(Reg: SrcReg2, RegInfo: TRI); |
680 | } |
681 | FusedComparisons += 1; |
682 | return true; |
683 | } |
684 | |
685 | // Process all comparison instructions in MBB. Return true if something |
686 | // changed. |
687 | bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) { |
688 | bool Changed = false; |
689 | |
690 | // Walk backwards through the block looking for comparisons, recording |
691 | // all CC users as we go. The subroutines can delete Compare and |
692 | // instructions before it. |
693 | LiveRegUnits LiveRegs(*TRI); |
694 | LiveRegs.addLiveOuts(MBB); |
695 | bool CompleteCCUsers = LiveRegs.available(SystemZ::CC); |
696 | SmallVector<MachineInstr *, 4> CCUsers; |
697 | MachineBasicBlock::iterator MBBI = MBB.end(); |
698 | while (MBBI != MBB.begin()) { |
699 | MachineInstr &MI = *--MBBI; |
700 | if (CompleteCCUsers && (MI.isCompare() || isLoadAndTestAsCmp(MI)) && |
701 | (optimizeCompareZero(Compare&: MI, CCUsers) || |
702 | fuseCompareOperations(Compare&: MI, CCUsers))) { |
703 | ++MBBI; |
704 | MI.eraseFromParent(); |
705 | Changed = true; |
706 | CCUsers.clear(); |
707 | continue; |
708 | } |
709 | |
710 | if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) { |
711 | CCUsers.clear(); |
712 | CompleteCCUsers = true; |
713 | } |
714 | if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr) && CompleteCCUsers) |
715 | CCUsers.push_back(Elt: &MI); |
716 | } |
717 | return Changed; |
718 | } |
719 | |
720 | bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) { |
721 | if (skipFunction(F: F.getFunction())) |
722 | return false; |
723 | |
724 | TII = F.getSubtarget<SystemZSubtarget>().getInstrInfo(); |
725 | TRI = &TII->getRegisterInfo(); |
726 | |
727 | bool Changed = false; |
728 | for (auto &MBB : F) |
729 | Changed |= processBlock(MBB); |
730 | |
731 | return Changed; |
732 | } |
733 | |
734 | FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) { |
735 | return new SystemZElimCompare(); |
736 | } |
737 | |