1 | //===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines a pass that fixes zero-extension of setcc patterns. |
10 | // X86 setcc instructions are modeled to have no input arguments, and a single |
11 | // GR8 output argument. This is consistent with other similar instructions |
12 | // (e.g. movb), but means it is impossible to directly generate a setcc into |
13 | // the lower GR8 of a specified GR32. |
14 | // This means that ISel must select (zext (setcc)) into something like |
15 | // seta %al; movzbl %al, %eax. |
16 | // Unfortunately, this can cause a stall due to the partial register write |
17 | // performed by the setcc. Instead, we can use: |
18 | // xor %eax, %eax; seta %al |
19 | // This both avoids the stall, and encodes shorter. |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #include "X86.h" |
23 | #include "X86InstrInfo.h" |
24 | #include "X86Subtarget.h" |
25 | #include "llvm/ADT/Statistic.h" |
26 | #include "llvm/CodeGen/MachineFunctionPass.h" |
27 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
28 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
29 | |
30 | using namespace llvm; |
31 | |
32 | #define DEBUG_TYPE "x86-fixup-setcc" |
33 | |
34 | STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted" ); |
35 | |
36 | namespace { |
37 | class X86FixupSetCCPass : public MachineFunctionPass { |
38 | public: |
39 | static char ID; |
40 | |
41 | X86FixupSetCCPass() : MachineFunctionPass(ID) {} |
42 | |
43 | StringRef getPassName() const override { return "X86 Fixup SetCC" ; } |
44 | |
45 | bool runOnMachineFunction(MachineFunction &MF) override; |
46 | |
47 | private: |
48 | MachineRegisterInfo *MRI = nullptr; |
49 | const X86InstrInfo *TII = nullptr; |
50 | |
51 | enum { SearchBound = 16 }; |
52 | }; |
53 | } // end anonymous namespace |
54 | |
55 | char X86FixupSetCCPass::ID = 0; |
56 | |
57 | INITIALIZE_PASS(X86FixupSetCCPass, DEBUG_TYPE, DEBUG_TYPE, false, false) |
58 | |
59 | FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } |
60 | |
61 | bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { |
62 | bool Changed = false; |
63 | MRI = &MF.getRegInfo(); |
64 | TII = MF.getSubtarget<X86Subtarget>().getInstrInfo(); |
65 | |
66 | SmallVector<MachineInstr*, 4> ToErase; |
67 | |
68 | for (auto &MBB : MF) { |
69 | MachineInstr *FlagsDefMI = nullptr; |
70 | for (auto &MI : MBB) { |
71 | // Remember the most recent preceding eflags defining instruction. |
72 | if (MI.definesRegister(X86::Reg: EFLAGS, /*TRI=*/nullptr)) |
73 | FlagsDefMI = &MI; |
74 | |
75 | // Find a setcc that is used by a zext. |
76 | // This doesn't have to be the only use, the transformation is safe |
77 | // regardless. |
78 | if (MI.getOpcode() != X86::SETCCr) |
79 | continue; |
80 | |
81 | MachineInstr *ZExt = nullptr; |
82 | for (auto &Use : MRI->use_instructions(Reg: MI.getOperand(i: 0).getReg())) |
83 | if (Use.getOpcode() == X86::MOVZX32rr8) |
84 | ZExt = &Use; |
85 | |
86 | if (!ZExt) |
87 | continue; |
88 | |
89 | if (!FlagsDefMI) |
90 | continue; |
91 | |
92 | // We'd like to put something that clobbers eflags directly before |
93 | // FlagsDefMI. This can't hurt anything after FlagsDefMI, because |
94 | // it, itself, by definition, clobbers eflags. But it may happen that |
95 | // FlagsDefMI also *uses* eflags, in which case the transformation is |
96 | // invalid. |
97 | if (FlagsDefMI->readsRegister(X86::Reg: EFLAGS, /*TRI=*/nullptr)) |
98 | continue; |
99 | |
100 | // On 32-bit, we need to be careful to force an ABCD register. |
101 | const TargetRegisterClass *RC = MF.getSubtarget<X86Subtarget>().is64Bit() |
102 | ? &X86::GR32RegClass |
103 | : &X86::GR32_ABCDRegClass; |
104 | if (!MRI->constrainRegClass(Reg: ZExt->getOperand(i: 0).getReg(), RC)) { |
105 | // If we cannot constrain the register, we would need an additional copy |
106 | // and are better off keeping the MOVZX32rr8 we have now. |
107 | continue; |
108 | } |
109 | |
110 | ++NumSubstZexts; |
111 | Changed = true; |
112 | |
113 | // Initialize a register with 0. This must go before the eflags def |
114 | Register ZeroReg = MRI->createVirtualRegister(RegClass: RC); |
115 | BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), |
116 | ZeroReg); |
117 | |
118 | // X86 setcc only takes an output GR8, so fake a GR32 input by inserting |
119 | // the setcc result into the low byte of the zeroed register. |
120 | BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), |
121 | TII->get(X86::INSERT_SUBREG), ZExt->getOperand(i: 0).getReg()) |
122 | .addReg(ZeroReg) |
123 | .addReg(MI.getOperand(i: 0).getReg()) |
124 | .addImm(X86::sub_8bit); |
125 | ToErase.push_back(Elt: ZExt); |
126 | } |
127 | } |
128 | |
129 | for (auto &I : ToErase) |
130 | I->eraseFromParent(); |
131 | |
132 | return Changed; |
133 | } |
134 | |