1 | //===- bolt/Passes/CMOVConversion.cpp ------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the CMOV conversion pass. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/CMOVConversion.h" |
14 | #include "bolt/Core/BinaryBasicBlock.h" |
15 | #include "bolt/Core/BinaryContext.h" |
16 | #include "bolt/Utils/CommandLineOpts.h" |
17 | #include "llvm/ADT/PostOrderIterator.h" |
18 | #include "llvm/Support/CommandLine.h" |
19 | #include "llvm/Support/ErrorHandling.h" |
20 | |
21 | #define DEBUG_TYPE "cmov" |
22 | |
23 | using namespace llvm; |
24 | |
25 | namespace opts { |
26 | |
27 | extern cl::OptionCategory BoltOptCategory; |
28 | |
29 | static cl::opt<int> BiasThreshold( |
30 | "cmov-conversion-bias-threshold" , |
31 | cl::desc("minimum condition bias (pct) to perform a CMOV conversion, " |
32 | "-1 to not account bias" ), |
33 | cl::ReallyHidden, cl::init(Val: 1), cl::cat(BoltOptCategory)); |
34 | |
35 | static cl::opt<int> MispredictionThreshold( |
36 | "cmov-conversion-misprediction-threshold" , |
37 | cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, " |
38 | "-1 to not account misprediction rate" ), |
39 | cl::ReallyHidden, cl::init(Val: 5), cl::cat(BoltOptCategory)); |
40 | |
41 | static cl::opt<bool> ConvertStackMemOperand( |
42 | "cmov-conversion-convert-stack-mem-operand" , |
43 | cl::desc("convert moves with stack memory operand (potentially unsafe)" ), |
44 | cl::ReallyHidden, cl::init(Val: false), cl::cat(BoltOptCategory)); |
45 | |
46 | static cl::opt<bool> ConvertBasePtrStackMemOperand( |
47 | "cmov-conversion-convert-rbp-stack-mem-operand" , |
48 | cl::desc("convert moves with rbp stack memory operand (unsafe, must be off " |
49 | "for binaries compiled with -fomit-frame-pointer)" ), |
50 | cl::ReallyHidden, cl::init(Val: false), cl::cat(BoltOptCategory)); |
51 | |
52 | } // namespace opts |
53 | |
54 | namespace llvm { |
55 | namespace bolt { |
56 | |
57 | // Return true if the CFG conforms to the following subgraph: |
58 | // Predecessor |
59 | // / \ |
60 | // | RHS |
61 | // \ / |
62 | // LHS |
63 | // Caller guarantees that LHS and RHS share the same predecessor. |
64 | bool isIfThenSubgraph(const BinaryBasicBlock &LHS, |
65 | const BinaryBasicBlock &RHS) { |
66 | if (LHS.pred_size() != 2 || RHS.pred_size() != 1) |
67 | return false; |
68 | |
69 | // Sanity check |
70 | BinaryBasicBlock *Predecessor = *RHS.pred_begin(); |
71 | assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph" ); |
72 | (void)Predecessor; |
73 | |
74 | if (!LHS.isPredecessor(BB: &RHS)) |
75 | return false; |
76 | if (RHS.succ_size() != 1) |
77 | return false; |
78 | return true; |
79 | } |
80 | |
81 | bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc, |
82 | BinaryBasicBlock *&UnconditionalSucc, |
83 | bool &IsConditionalTaken) { |
84 | BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(Condition: true); |
85 | BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(Condition: false); |
86 | bool IsIfThenTaken = isIfThenSubgraph(LHS: *FallthroughSucc, RHS: *TakenSucc); |
87 | bool IsIfThenFallthrough = isIfThenSubgraph(LHS: *TakenSucc, RHS: *FallthroughSucc); |
88 | if (!IsIfThenFallthrough && !IsIfThenTaken) |
89 | return false; |
90 | assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph" ); |
91 | |
92 | // Output parameters |
93 | ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc; |
94 | UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc; |
95 | IsConditionalTaken = IsIfThenTaken; |
96 | return true; |
97 | } |
98 | |
99 | // Return true if basic block instructions can be converted into cmov(s). |
100 | bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB, |
101 | unsigned CC) { |
102 | if (BB.empty()) |
103 | return false; |
104 | const MCInst *LastInst = BB.getLastNonPseudoInstr(); |
105 | // Only pseudo instructions, can't be converted into CMOV |
106 | if (LastInst == nullptr) |
107 | return false; |
108 | for (const MCInst &Inst : BB) { |
109 | if (BC.MIB->isPseudo(Inst)) |
110 | continue; |
111 | // Unconditional branch as a last instruction is OK |
112 | if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst)) |
113 | continue; |
114 | MCInst Cmov(Inst); |
115 | // GPR move is OK |
116 | if (!BC.MIB->convertMoveToConditionalMove( |
117 | Inst&: Cmov, CC, AllowStackMemOp: opts::ConvertStackMemOperand, |
118 | AllowBasePtrStackMemOp: opts::ConvertBasePtrStackMemOperand)) { |
119 | LLVM_DEBUG({ |
120 | dbgs() << BB.getName() << ": can't convert instruction " ; |
121 | BC.printInstruction(dbgs(), Cmov); |
122 | }); |
123 | return false; |
124 | } |
125 | } |
126 | return true; |
127 | } |
128 | |
129 | void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) { |
130 | for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) { |
131 | if (BC.MIB->isPseudo(Inst: *II)) |
132 | continue; |
133 | if (BC.MIB->isUnconditionalBranch(Inst: *II)) { |
134 | // XXX: this invalidates II but we return immediately |
135 | BB.eraseInstruction(II); |
136 | return; |
137 | } |
138 | bool Result = BC.MIB->convertMoveToConditionalMove( |
139 | Inst&: *II, CC, AllowStackMemOp: opts::ConvertStackMemOperand, |
140 | AllowBasePtrStackMemOp: opts::ConvertBasePtrStackMemOperand); |
141 | assert(Result && "unexpected instruction" ); |
142 | (void)Result; |
143 | } |
144 | } |
145 | |
146 | // Returns misprediction rate if the profile data is available, -1 otherwise. |
147 | std::pair<int, uint64_t> |
148 | calculateMispredictionRate(const BinaryBasicBlock &BB) { |
149 | uint64_t TotalExecCount = 0; |
150 | uint64_t TotalMispredictionCount = 0; |
151 | for (auto BI : BB.branch_info()) { |
152 | TotalExecCount += BI.Count; |
153 | if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) |
154 | TotalMispredictionCount += BI.MispredictedCount; |
155 | } |
156 | if (!TotalExecCount) |
157 | return {-1, TotalMispredictionCount}; |
158 | return {100.0f * TotalMispredictionCount / TotalExecCount, |
159 | TotalMispredictionCount}; |
160 | } |
161 | |
162 | // Returns conditional succ bias if the profile is available, -1 otherwise. |
163 | int calculateConditionBias(const BinaryBasicBlock &BB, |
164 | const BinaryBasicBlock &ConditionalSucc) { |
165 | if (auto BranchStats = BB.getBranchStats(Succ: &ConditionalSucc)) |
166 | return BranchStats->first; |
167 | return -1; |
168 | } |
169 | |
170 | void CMOVConversion::Stats::dumpTo(raw_ostream &OS) { |
171 | OS << "converted static " << StaticPerformed << "/" << StaticPossible |
172 | << formatv(Fmt: " ({0:P}) " , Vals: getStaticRatio()) |
173 | << "hammock(s) into CMOV sequences, with dynamic execution count " |
174 | << DynamicPerformed << "/" << DynamicPossible |
175 | << formatv(Fmt: " ({0:P}), " , Vals: getDynamicRatio()) << "saving " << RemovedMP |
176 | << "/" << PossibleMP << formatv(Fmt: " ({0:P}) " , Vals: getMPRatio()) |
177 | << "mispredictions\n" ; |
178 | } |
179 | |
180 | void CMOVConversion::runOnFunction(BinaryFunction &Function) { |
181 | BinaryContext &BC = Function.getBinaryContext(); |
182 | bool Modified = false; |
183 | // Function-local stats |
184 | Stats Local; |
185 | // Traverse blocks in RPO, merging block with a converted cmov with its |
186 | // successor. |
187 | for (BinaryBasicBlock *BB : post_order(G: &Function)) { |
188 | uint64_t BBExecCount = BB->getKnownExecutionCount(); |
189 | if (BB->empty() || // The block must have instructions |
190 | BBExecCount == 0 || // must be hot |
191 | BB->succ_size() != 2 || // with two successors |
192 | BB->hasJumpTable()) // no jump table |
193 | continue; |
194 | |
195 | assert(BB->isValid() && "traversal internal error" ); |
196 | |
197 | // Check branch instruction |
198 | auto BranchInstrIter = BB->getLastNonPseudo(); |
199 | if (BranchInstrIter == BB->rend() || |
200 | !BC.MIB->isConditionalBranch(Inst: *BranchInstrIter)) |
201 | continue; |
202 | |
203 | // Check successors |
204 | BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc; |
205 | bool IsConditionalTaken; |
206 | if (!matchCFGSubgraph(BB&: *BB, ConditionalSucc, UnconditionalSucc, |
207 | IsConditionalTaken)) { |
208 | LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n" ); |
209 | continue; |
210 | } |
211 | |
212 | unsigned CC = BC.MIB->getCondCode(Inst: *BranchInstrIter); |
213 | if (!IsConditionalTaken) |
214 | CC = BC.MIB->getInvertedCondCode(CC); |
215 | // Check contents of the conditional block |
216 | if (!canConvertInstructions(BC, BB: *ConditionalSucc, CC)) |
217 | continue; |
218 | |
219 | int ConditionBias = calculateConditionBias(BB: *BB, ConditionalSucc: *ConditionalSucc); |
220 | int MispredictionRate = 0; |
221 | uint64_t MispredictionCount = 0; |
222 | std::tie(args&: MispredictionRate, args&: MispredictionCount) = |
223 | calculateMispredictionRate(BB: *BB); |
224 | |
225 | Local.StaticPossible++; |
226 | Local.DynamicPossible += BBExecCount; |
227 | Local.PossibleMP += MispredictionCount; |
228 | |
229 | // If the conditional successor is never executed, don't convert it |
230 | if (ConditionBias < opts::BiasThreshold) { |
231 | LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName() |
232 | << " bias = " << ConditionBias |
233 | << ", less than threshold " << opts::BiasThreshold |
234 | << '\n'); |
235 | continue; |
236 | } |
237 | |
238 | // Check the misprediction rate of a branch |
239 | if (MispredictionRate < opts::MispredictionThreshold) { |
240 | LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = " |
241 | << MispredictionRate << ", less than threshold " |
242 | << opts::MispredictionThreshold << '\n'); |
243 | continue; |
244 | } |
245 | |
246 | // remove conditional branch |
247 | BB->eraseInstruction(II: std::prev(x: BranchInstrIter.base())); |
248 | BB->removeAllSuccessors(); |
249 | // Convert instructions from the conditional successor into cmov's in BB. |
250 | convertMoves(BC, BB&: *ConditionalSucc, CC); |
251 | BB->addInstructions(Begin: ConditionalSucc->begin(), End: ConditionalSucc->end()); |
252 | ConditionalSucc->markValid(Valid: false); |
253 | |
254 | // RPO traversal guarantees that the successor is visited and merged if |
255 | // necessary. Merge the unconditional successor into the current block. |
256 | BB->addInstructions(Begin: UnconditionalSucc->begin(), End: UnconditionalSucc->end()); |
257 | UnconditionalSucc->moveAllSuccessorsTo(New: BB); |
258 | UnconditionalSucc->markValid(Valid: false); |
259 | Local.StaticPerformed++; |
260 | Local.DynamicPerformed += BBExecCount; |
261 | Local.RemovedMP += MispredictionCount; |
262 | Modified = true; |
263 | } |
264 | if (Modified) |
265 | Function.eraseInvalidBBs(); |
266 | if (opts::Verbosity > 1) { |
267 | BC.outs() << "BOLT-INFO: CMOVConversion: " << Function << ", " ; |
268 | Local.dumpTo(OS&: BC.outs()); |
269 | } |
270 | Global = Global + Local; |
271 | } |
272 | |
273 | Error CMOVConversion::runOnFunctions(BinaryContext &BC) { |
274 | for (auto &It : BC.getBinaryFunctions()) { |
275 | BinaryFunction &Function = It.second; |
276 | if (!shouldOptimize(BF: Function)) |
277 | continue; |
278 | runOnFunction(Function); |
279 | } |
280 | |
281 | BC.outs() << "BOLT-INFO: CMOVConversion total: " ; |
282 | Global.dumpTo(OS&: BC.outs()); |
283 | return Error::success(); |
284 | } |
285 | |
286 | } // end namespace bolt |
287 | } // end namespace llvm |
288 | |