| 1 | //===- bolt/Passes/CMOVConversion.cpp ------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the CMOV conversion pass. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/CMOVConversion.h" |
| 14 | #include "bolt/Core/BinaryBasicBlock.h" |
| 15 | #include "bolt/Core/BinaryContext.h" |
| 16 | #include "bolt/Utils/CommandLineOpts.h" |
| 17 | #include "llvm/ADT/PostOrderIterator.h" |
| 18 | #include "llvm/Support/CommandLine.h" |
| 19 | #include "llvm/Support/ErrorHandling.h" |
| 20 | |
| 21 | #define DEBUG_TYPE "cmov" |
| 22 | |
| 23 | using namespace llvm; |
| 24 | |
| 25 | namespace opts { |
| 26 | |
| 27 | extern cl::OptionCategory BoltOptCategory; |
| 28 | |
| 29 | static cl::opt<int> BiasThreshold( |
| 30 | "cmov-conversion-bias-threshold" , |
| 31 | cl::desc("minimum condition bias (pct) to perform a CMOV conversion, " |
| 32 | "-1 to not account bias" ), |
| 33 | cl::ReallyHidden, cl::init(Val: 1), cl::cat(BoltOptCategory)); |
| 34 | |
| 35 | static cl::opt<int> MispredictionThreshold( |
| 36 | "cmov-conversion-misprediction-threshold" , |
| 37 | cl::desc("minimum misprediction rate (pct) to perform a CMOV conversion, " |
| 38 | "-1 to not account misprediction rate" ), |
| 39 | cl::ReallyHidden, cl::init(Val: 5), cl::cat(BoltOptCategory)); |
| 40 | |
| 41 | static cl::opt<bool> ConvertStackMemOperand( |
| 42 | "cmov-conversion-convert-stack-mem-operand" , |
| 43 | cl::desc("convert moves with stack memory operand (potentially unsafe)" ), |
| 44 | cl::ReallyHidden, cl::init(Val: false), cl::cat(BoltOptCategory)); |
| 45 | |
| 46 | static cl::opt<bool> ConvertBasePtrStackMemOperand( |
| 47 | "cmov-conversion-convert-rbp-stack-mem-operand" , |
| 48 | cl::desc("convert moves with rbp stack memory operand (unsafe, must be off " |
| 49 | "for binaries compiled with -fomit-frame-pointer)" ), |
| 50 | cl::ReallyHidden, cl::init(Val: false), cl::cat(BoltOptCategory)); |
| 51 | |
| 52 | } // namespace opts |
| 53 | |
| 54 | namespace llvm { |
| 55 | namespace bolt { |
| 56 | |
| 57 | // Return true if the CFG conforms to the following subgraph: |
| 58 | // Predecessor |
| 59 | // / \ |
| 60 | // | RHS |
| 61 | // \ / |
| 62 | // LHS |
| 63 | // Caller guarantees that LHS and RHS share the same predecessor. |
| 64 | bool isIfThenSubgraph(const BinaryBasicBlock &LHS, |
| 65 | const BinaryBasicBlock &RHS) { |
| 66 | if (LHS.pred_size() != 2 || RHS.pred_size() != 1) |
| 67 | return false; |
| 68 | |
| 69 | // Sanity check |
| 70 | BinaryBasicBlock *Predecessor = *RHS.pred_begin(); |
| 71 | assert(Predecessor && LHS.isPredecessor(Predecessor) && "invalid subgraph" ); |
| 72 | (void)Predecessor; |
| 73 | |
| 74 | if (!LHS.isPredecessor(BB: &RHS)) |
| 75 | return false; |
| 76 | if (RHS.succ_size() != 1) |
| 77 | return false; |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | bool matchCFGSubgraph(BinaryBasicBlock &BB, BinaryBasicBlock *&ConditionalSucc, |
| 82 | BinaryBasicBlock *&UnconditionalSucc, |
| 83 | bool &IsConditionalTaken) { |
| 84 | BinaryBasicBlock *TakenSucc = BB.getConditionalSuccessor(Condition: true); |
| 85 | BinaryBasicBlock *FallthroughSucc = BB.getConditionalSuccessor(Condition: false); |
| 86 | bool IsIfThenTaken = isIfThenSubgraph(LHS: *FallthroughSucc, RHS: *TakenSucc); |
| 87 | bool IsIfThenFallthrough = isIfThenSubgraph(LHS: *TakenSucc, RHS: *FallthroughSucc); |
| 88 | if (!IsIfThenFallthrough && !IsIfThenTaken) |
| 89 | return false; |
| 90 | assert((!IsIfThenFallthrough || !IsIfThenTaken) && "Invalid subgraph" ); |
| 91 | |
| 92 | // Output parameters |
| 93 | ConditionalSucc = IsIfThenTaken ? TakenSucc : FallthroughSucc; |
| 94 | UnconditionalSucc = IsIfThenTaken ? FallthroughSucc : TakenSucc; |
| 95 | IsConditionalTaken = IsIfThenTaken; |
| 96 | return true; |
| 97 | } |
| 98 | |
| 99 | // Return true if basic block instructions can be converted into cmov(s). |
| 100 | bool canConvertInstructions(const BinaryContext &BC, const BinaryBasicBlock &BB, |
| 101 | unsigned CC) { |
| 102 | if (BB.empty()) |
| 103 | return false; |
| 104 | const MCInst *LastInst = BB.getLastNonPseudoInstr(); |
| 105 | // Only pseudo instructions, can't be converted into CMOV |
| 106 | if (LastInst == nullptr) |
| 107 | return false; |
| 108 | for (const MCInst &Inst : BB) { |
| 109 | if (BC.MIB->isPseudo(Inst)) |
| 110 | continue; |
| 111 | // Unconditional branch as a last instruction is OK |
| 112 | if (&Inst == LastInst && BC.MIB->isUnconditionalBranch(Inst)) |
| 113 | continue; |
| 114 | MCInst Cmov(Inst); |
| 115 | // GPR move is OK |
| 116 | if (!BC.MIB->convertMoveToConditionalMove( |
| 117 | Inst&: Cmov, CC, AllowStackMemOp: opts::ConvertStackMemOperand, |
| 118 | AllowBasePtrStackMemOp: opts::ConvertBasePtrStackMemOperand)) { |
| 119 | LLVM_DEBUG({ |
| 120 | dbgs() << BB.getName() << ": can't convert instruction " ; |
| 121 | BC.printInstruction(dbgs(), Cmov); |
| 122 | }); |
| 123 | return false; |
| 124 | } |
| 125 | } |
| 126 | return true; |
| 127 | } |
| 128 | |
| 129 | void convertMoves(const BinaryContext &BC, BinaryBasicBlock &BB, unsigned CC) { |
| 130 | for (auto II = BB.begin(), IE = BB.end(); II != IE; ++II) { |
| 131 | if (BC.MIB->isPseudo(Inst: *II)) |
| 132 | continue; |
| 133 | if (BC.MIB->isUnconditionalBranch(Inst: *II)) { |
| 134 | // XXX: this invalidates II but we return immediately |
| 135 | BB.eraseInstruction(II); |
| 136 | return; |
| 137 | } |
| 138 | bool Result = BC.MIB->convertMoveToConditionalMove( |
| 139 | Inst&: *II, CC, AllowStackMemOp: opts::ConvertStackMemOperand, |
| 140 | AllowBasePtrStackMemOp: opts::ConvertBasePtrStackMemOperand); |
| 141 | assert(Result && "unexpected instruction" ); |
| 142 | (void)Result; |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | // Returns misprediction rate if the profile data is available, -1 otherwise. |
| 147 | std::pair<int, uint64_t> |
| 148 | calculateMispredictionRate(const BinaryBasicBlock &BB) { |
| 149 | uint64_t TotalExecCount = 0; |
| 150 | uint64_t TotalMispredictionCount = 0; |
| 151 | for (auto BI : BB.branch_info()) { |
| 152 | TotalExecCount += BI.Count; |
| 153 | if (BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) |
| 154 | TotalMispredictionCount += BI.MispredictedCount; |
| 155 | } |
| 156 | if (!TotalExecCount) |
| 157 | return {-1, TotalMispredictionCount}; |
| 158 | return {100.0f * TotalMispredictionCount / TotalExecCount, |
| 159 | TotalMispredictionCount}; |
| 160 | } |
| 161 | |
| 162 | // Returns conditional succ bias if the profile is available, -1 otherwise. |
| 163 | int calculateConditionBias(const BinaryBasicBlock &BB, |
| 164 | const BinaryBasicBlock &ConditionalSucc) { |
| 165 | if (auto BranchStats = BB.getBranchStats(Succ: &ConditionalSucc)) |
| 166 | return BranchStats->first; |
| 167 | return -1; |
| 168 | } |
| 169 | |
| 170 | void CMOVConversion::Stats::dumpTo(raw_ostream &OS) { |
| 171 | OS << "converted static " << StaticPerformed << "/" << StaticPossible |
| 172 | << formatv(Fmt: " ({0:P}) " , Vals: getStaticRatio()) |
| 173 | << "hammock(s) into CMOV sequences, with dynamic execution count " |
| 174 | << DynamicPerformed << "/" << DynamicPossible |
| 175 | << formatv(Fmt: " ({0:P}), " , Vals: getDynamicRatio()) << "saving " << RemovedMP |
| 176 | << "/" << PossibleMP << formatv(Fmt: " ({0:P}) " , Vals: getMPRatio()) |
| 177 | << "mispredictions\n" ; |
| 178 | } |
| 179 | |
| 180 | void CMOVConversion::runOnFunction(BinaryFunction &Function) { |
| 181 | BinaryContext &BC = Function.getBinaryContext(); |
| 182 | bool Modified = false; |
| 183 | // Function-local stats |
| 184 | Stats Local; |
| 185 | // Traverse blocks in RPO, merging block with a converted cmov with its |
| 186 | // successor. |
| 187 | for (BinaryBasicBlock *BB : post_order(G: &Function)) { |
| 188 | uint64_t BBExecCount = BB->getKnownExecutionCount(); |
| 189 | if (BB->empty() || // The block must have instructions |
| 190 | BBExecCount == 0 || // must be hot |
| 191 | BB->succ_size() != 2 || // with two successors |
| 192 | BB->hasJumpTable()) // no jump table |
| 193 | continue; |
| 194 | |
| 195 | assert(BB->isValid() && "traversal internal error" ); |
| 196 | |
| 197 | // Check branch instruction |
| 198 | auto BranchInstrIter = BB->getLastNonPseudo(); |
| 199 | if (BranchInstrIter == BB->rend() || |
| 200 | !BC.MIB->isConditionalBranch(Inst: *BranchInstrIter)) |
| 201 | continue; |
| 202 | |
| 203 | // Check successors |
| 204 | BinaryBasicBlock *ConditionalSucc, *UnconditionalSucc; |
| 205 | bool IsConditionalTaken; |
| 206 | if (!matchCFGSubgraph(BB&: *BB, ConditionalSucc, UnconditionalSucc, |
| 207 | IsConditionalTaken)) { |
| 208 | LLVM_DEBUG(dbgs() << BB->getName() << ": couldn't match hammock\n" ); |
| 209 | continue; |
| 210 | } |
| 211 | |
| 212 | unsigned CC = BC.MIB->getCondCode(Inst: *BranchInstrIter); |
| 213 | if (!IsConditionalTaken) |
| 214 | CC = BC.MIB->getInvertedCondCode(CC); |
| 215 | // Check contents of the conditional block |
| 216 | if (!canConvertInstructions(BC, BB: *ConditionalSucc, CC)) |
| 217 | continue; |
| 218 | |
| 219 | int ConditionBias = calculateConditionBias(BB: *BB, ConditionalSucc: *ConditionalSucc); |
| 220 | int MispredictionRate = 0; |
| 221 | uint64_t MispredictionCount = 0; |
| 222 | std::tie(args&: MispredictionRate, args&: MispredictionCount) = |
| 223 | calculateMispredictionRate(BB: *BB); |
| 224 | |
| 225 | Local.StaticPossible++; |
| 226 | Local.DynamicPossible += BBExecCount; |
| 227 | Local.PossibleMP += MispredictionCount; |
| 228 | |
| 229 | // If the conditional successor is never executed, don't convert it |
| 230 | if (ConditionBias < opts::BiasThreshold) { |
| 231 | LLVM_DEBUG(dbgs() << BB->getName() << "->" << ConditionalSucc->getName() |
| 232 | << " bias = " << ConditionBias |
| 233 | << ", less than threshold " << opts::BiasThreshold |
| 234 | << '\n'); |
| 235 | continue; |
| 236 | } |
| 237 | |
| 238 | // Check the misprediction rate of a branch |
| 239 | if (MispredictionRate < opts::MispredictionThreshold) { |
| 240 | LLVM_DEBUG(dbgs() << BB->getName() << " misprediction rate = " |
| 241 | << MispredictionRate << ", less than threshold " |
| 242 | << opts::MispredictionThreshold << '\n'); |
| 243 | continue; |
| 244 | } |
| 245 | |
| 246 | // remove conditional branch |
| 247 | BB->eraseInstruction(II: std::prev(x: BranchInstrIter.base())); |
| 248 | BB->removeAllSuccessors(); |
| 249 | // Convert instructions from the conditional successor into cmov's in BB. |
| 250 | convertMoves(BC, BB&: *ConditionalSucc, CC); |
| 251 | BB->addInstructions(Begin: ConditionalSucc->begin(), End: ConditionalSucc->end()); |
| 252 | ConditionalSucc->markValid(Valid: false); |
| 253 | |
| 254 | // RPO traversal guarantees that the successor is visited and merged if |
| 255 | // necessary. Merge the unconditional successor into the current block. |
| 256 | BB->addInstructions(Begin: UnconditionalSucc->begin(), End: UnconditionalSucc->end()); |
| 257 | UnconditionalSucc->moveAllSuccessorsTo(New: BB); |
| 258 | UnconditionalSucc->markValid(Valid: false); |
| 259 | Local.StaticPerformed++; |
| 260 | Local.DynamicPerformed += BBExecCount; |
| 261 | Local.RemovedMP += MispredictionCount; |
| 262 | Modified = true; |
| 263 | } |
| 264 | if (Modified) |
| 265 | Function.eraseInvalidBBs(); |
| 266 | if (opts::Verbosity > 1) { |
| 267 | BC.outs() << "BOLT-INFO: CMOVConversion: " << Function << ", " ; |
| 268 | Local.dumpTo(OS&: BC.outs()); |
| 269 | } |
| 270 | Global = Global + Local; |
| 271 | } |
| 272 | |
| 273 | Error CMOVConversion::runOnFunctions(BinaryContext &BC) { |
| 274 | for (auto &It : BC.getBinaryFunctions()) { |
| 275 | BinaryFunction &Function = It.second; |
| 276 | if (!shouldOptimize(BF: Function)) |
| 277 | continue; |
| 278 | runOnFunction(Function); |
| 279 | } |
| 280 | |
| 281 | BC.outs() << "BOLT-INFO: CMOVConversion total: " ; |
| 282 | Global.dumpTo(OS&: BC.outs()); |
| 283 | return Error::success(); |
| 284 | } |
| 285 | |
| 286 | } // end namespace bolt |
| 287 | } // end namespace llvm |
| 288 | |