| 1 | //===- bolt/Core/BinaryFunctionProfile.cpp - Profile processing -----------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements BinaryFunction member functions related to processing |
| 10 | // the execution profile. |
| 11 | // |
| 12 | //===----------------------------------------------------------------------===// |
| 13 | |
| 14 | #include "bolt/Core/BinaryBasicBlock.h" |
| 15 | #include "bolt/Core/BinaryFunction.h" |
| 16 | #include "llvm/Support/CommandLine.h" |
| 17 | #include "llvm/Support/Debug.h" |
| 18 | #include "llvm/Support/raw_ostream.h" |
| 19 | |
| 20 | #undef DEBUG_TYPE |
| 21 | #define DEBUG_TYPE "bolt-prof" |
| 22 | |
| 23 | using namespace llvm; |
| 24 | using namespace bolt; |
| 25 | |
| 26 | namespace opts { |
| 27 | |
| 28 | extern cl::OptionCategory BoltOptCategory; |
| 29 | |
| 30 | cl::opt<IndirectCallPromotionType> ICP( |
| 31 | "indirect-call-promotion" , cl::init(Val: ICP_NONE), |
| 32 | cl::desc("indirect call promotion" ), |
| 33 | cl::values( |
| 34 | clEnumValN(ICP_NONE, "none" , "do not perform indirect call promotion" ), |
| 35 | clEnumValN(ICP_CALLS, "calls" , "perform ICP on indirect calls" ), |
| 36 | clEnumValN(ICP_JUMP_TABLES, "jump-tables" , |
| 37 | "perform ICP on jump tables" ), |
| 38 | clEnumValN(ICP_ALL, "all" , "perform ICP on calls and jump tables" )), |
| 39 | cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
| 40 | |
| 41 | static cl::alias ICPAlias("icp" , |
| 42 | cl::desc("Alias for --indirect-call-promotion" ), |
| 43 | cl::aliasopt(ICP)); |
| 44 | |
| 45 | extern cl::opt<JumpTableSupportLevel> JumpTables; |
| 46 | |
| 47 | static cl::opt<bool> FixFuncCounts( |
| 48 | "fix-func-counts" , |
| 49 | cl::desc("adjust function counts based on basic blocks execution count" ), |
| 50 | cl::Hidden, cl::cat(BoltOptCategory)); |
| 51 | |
| 52 | static cl::opt<bool> FixBlockCounts( |
| 53 | "fix-block-counts" , |
| 54 | cl::desc("adjust block counts based on outgoing branch counts" ), |
| 55 | cl::init(Val: true), cl::Hidden, cl::cat(BoltOptCategory)); |
| 56 | |
| 57 | static cl::opt<bool> |
| 58 | InferFallThroughs("infer-fall-throughs" , |
| 59 | cl::desc("infer execution count for fall-through blocks" ), |
| 60 | cl::Hidden, cl::cat(BoltOptCategory)); |
| 61 | |
| 62 | } // namespace opts |
| 63 | |
| 64 | namespace llvm { |
| 65 | namespace bolt { |
| 66 | |
| 67 | void BinaryFunction::postProcessProfile() { |
| 68 | if (!hasValidProfile()) { |
| 69 | clearProfile(); |
| 70 | return; |
| 71 | } |
| 72 | |
| 73 | if (!(getProfileFlags() & PF_BRANCH)) |
| 74 | return; |
| 75 | |
| 76 | // If we have at least some branch data for the function indicate that it |
| 77 | // was executed. |
| 78 | if (opts::FixFuncCounts && ExecutionCount == 0) |
| 79 | ExecutionCount = 1; |
| 80 | |
| 81 | // Compute preliminary execution count for each basic block. |
| 82 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 83 | if ((!BB->isEntryPoint() && !BB->isLandingPad()) || |
| 84 | BB->ExecutionCount == BinaryBasicBlock::COUNT_NO_PROFILE) |
| 85 | BB->ExecutionCount = 0; |
| 86 | } |
| 87 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 88 | auto SuccBIIter = BB->branch_info_begin(); |
| 89 | for (BinaryBasicBlock *Succ : BB->successors()) { |
| 90 | // All incoming edges to the primary entry have been accounted for, thus |
| 91 | // we skip the update here. |
| 92 | if (SuccBIIter->Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
| 93 | Succ != BasicBlocks.front()) |
| 94 | Succ->setExecutionCount(Succ->getExecutionCount() + SuccBIIter->Count); |
| 95 | ++SuccBIIter; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | // Fix for old profiles. |
| 100 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 101 | if (BB->size() != 1 || BB->succ_size() != 1) |
| 102 | continue; |
| 103 | |
| 104 | if (BB->getKnownExecutionCount() == 0) |
| 105 | continue; |
| 106 | |
| 107 | MCInst *Instr = BB->getFirstNonPseudoInstr(); |
| 108 | assert(Instr && "expected non-pseudo instr" ); |
| 109 | if (!BC.MIB->hasAnnotation(Inst: *Instr, Name: "NOP" )) |
| 110 | continue; |
| 111 | |
| 112 | BinaryBasicBlock *FTSuccessor = BB->getSuccessor(); |
| 113 | BinaryBasicBlock::BinaryBranchInfo &BI = BB->getBranchInfo(Succ: *FTSuccessor); |
| 114 | if (!BI.Count) { |
| 115 | BI.Count = BB->getKnownExecutionCount(); |
| 116 | FTSuccessor->setExecutionCount(FTSuccessor->getKnownExecutionCount() + |
| 117 | BI.Count); |
| 118 | } |
| 119 | } |
| 120 | |
| 121 | if (opts::FixBlockCounts) { |
| 122 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 123 | // Make sure that execution count of a block is at least the branch count |
| 124 | // of an incoming/outgoing jump. |
| 125 | auto SuccBIIter = BB->branch_info_begin(); |
| 126 | for (BinaryBasicBlock *Succ : BB->successors()) { |
| 127 | uint64_t Count = SuccBIIter->Count; |
| 128 | if (Count != BinaryBasicBlock::COUNT_NO_PROFILE && Count > 0) { |
| 129 | Succ->setExecutionCount(std::max(a: Succ->getExecutionCount(), b: Count)); |
| 130 | BB->setExecutionCount(std::max(a: BB->getExecutionCount(), b: Count)); |
| 131 | } |
| 132 | ++SuccBIIter; |
| 133 | } |
| 134 | // Make sure that execution count of a block is at least the number of |
| 135 | // function calls from the block. |
| 136 | for (MCInst &Inst : *BB) { |
| 137 | // Ignore non-call instruction |
| 138 | if (!BC.MIB->isCall(Inst)) |
| 139 | continue; |
| 140 | |
| 141 | auto CountAnnt = BC.MIB->tryGetAnnotationAs<uint64_t>(Inst, Name: "Count" ); |
| 142 | if (CountAnnt) |
| 143 | BB->setExecutionCount(std::max(a: BB->getExecutionCount(), b: *CountAnnt)); |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |
| 148 | if (opts::InferFallThroughs) |
| 149 | inferFallThroughCounts(); |
| 150 | |
| 151 | // Update profile information for jump tables based on CFG branch data. |
| 152 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 153 | const MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
| 154 | if (!LastInstr) |
| 155 | continue; |
| 156 | const uint64_t JTAddress = BC.MIB->getJumpTable(Inst: *LastInstr); |
| 157 | if (!JTAddress) |
| 158 | continue; |
| 159 | JumpTable *JT = getJumpTableContainingAddress(Address: JTAddress); |
| 160 | if (!JT) |
| 161 | continue; |
| 162 | |
| 163 | uint64_t TotalBranchCount = 0; |
| 164 | for (const BinaryBasicBlock::BinaryBranchInfo &BranchInfo : |
| 165 | BB->branch_info()) { |
| 166 | TotalBranchCount += BranchInfo.Count; |
| 167 | } |
| 168 | JT->Count += TotalBranchCount; |
| 169 | |
| 170 | if (opts::ICP < ICP_JUMP_TABLES && opts::JumpTables < JTS_AGGRESSIVE) |
| 171 | continue; |
| 172 | |
| 173 | if (JT->Counts.empty()) |
| 174 | JT->Counts.resize(new_size: JT->Entries.size()); |
| 175 | auto EI = JT->Entries.begin(); |
| 176 | uint64_t Delta = (JTAddress - JT->getAddress()) / JT->EntrySize; |
| 177 | EI += Delta; |
| 178 | while (EI != JT->Entries.end()) { |
| 179 | const BinaryBasicBlock *TargetBB = getBasicBlockForLabel(Label: *EI); |
| 180 | if (TargetBB) { |
| 181 | const BinaryBasicBlock::BinaryBranchInfo &BranchInfo = |
| 182 | BB->getBranchInfo(Succ: *TargetBB); |
| 183 | assert(Delta < JT->Counts.size()); |
| 184 | JT->Counts[Delta].Count += BranchInfo.Count; |
| 185 | JT->Counts[Delta].Mispreds += BranchInfo.MispredictedCount; |
| 186 | } |
| 187 | ++Delta; |
| 188 | ++EI; |
| 189 | // A label marks the start of another jump table. |
| 190 | if (JT->Labels.count(x: Delta * JT->EntrySize)) |
| 191 | break; |
| 192 | } |
| 193 | } |
| 194 | } |
| 195 | |
| 196 | void BinaryFunction::mergeProfileDataInto(BinaryFunction &BF) const { |
| 197 | // No reason to merge invalid or empty profiles into BF. |
| 198 | if (!hasValidProfile()) |
| 199 | return; |
| 200 | |
| 201 | // Update function execution count. |
| 202 | if (getExecutionCount() != BinaryFunction::COUNT_NO_PROFILE) |
| 203 | BF.setExecutionCount(BF.getKnownExecutionCount() + getExecutionCount()); |
| 204 | |
| 205 | // Since we are merging a valid profile, the new profile should be valid too. |
| 206 | // It has either already been valid, or it has been cleaned up. |
| 207 | BF.ProfileMatchRatio = 1.0f; |
| 208 | |
| 209 | // Update basic block and edge counts. |
| 210 | auto BBMergeI = BF.begin(); |
| 211 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 212 | BinaryBasicBlock *BBMerge = &*BBMergeI; |
| 213 | assert(getIndex(BB) == BF.getIndex(BBMerge)); |
| 214 | |
| 215 | // Update basic block count. |
| 216 | if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) { |
| 217 | BBMerge->setExecutionCount(BBMerge->getKnownExecutionCount() + |
| 218 | BB->getExecutionCount()); |
| 219 | } |
| 220 | |
| 221 | // Update edge count for successors of this basic block. |
| 222 | auto BBMergeSI = BBMerge->succ_begin(); |
| 223 | auto BIMergeI = BBMerge->branch_info_begin(); |
| 224 | auto BII = BB->branch_info_begin(); |
| 225 | for (const BinaryBasicBlock *BBSucc : BB->successors()) { |
| 226 | (void)BBSucc; |
| 227 | assert(getIndex(BBSucc) == BF.getIndex(*BBMergeSI)); |
| 228 | (void)BBMergeSI; |
| 229 | |
| 230 | // At this point no branch count should be set to COUNT_NO_PROFILE. |
| 231 | assert(BII->Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
| 232 | "unexpected unknown branch profile" ); |
| 233 | assert(BIMergeI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && |
| 234 | "unexpected unknown branch profile" ); |
| 235 | |
| 236 | BIMergeI->Count += BII->Count; |
| 237 | |
| 238 | // When we merge inferred and real fall-through branch data, the merged |
| 239 | // data is considered inferred. |
| 240 | if (BII->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED && |
| 241 | BIMergeI->MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { |
| 242 | BIMergeI->MispredictedCount += BII->MispredictedCount; |
| 243 | } else { |
| 244 | BIMergeI->MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; |
| 245 | } |
| 246 | |
| 247 | ++BBMergeSI; |
| 248 | ++BII; |
| 249 | ++BIMergeI; |
| 250 | } |
| 251 | assert(BBMergeSI == BBMerge->succ_end()); |
| 252 | |
| 253 | ++BBMergeI; |
| 254 | } |
| 255 | assert(BBMergeI == BF.end()); |
| 256 | |
| 257 | // Merge jump tables profile info. |
| 258 | auto JTMergeI = BF.JumpTables.begin(); |
| 259 | for (const auto &JTEntry : JumpTables) { |
| 260 | if (JTMergeI->second->Counts.empty()) |
| 261 | JTMergeI->second->Counts.resize(new_size: JTEntry.second->Counts.size()); |
| 262 | auto CountMergeI = JTMergeI->second->Counts.begin(); |
| 263 | for (const JumpTable::JumpInfo &JI : JTEntry.second->Counts) { |
| 264 | CountMergeI->Count += JI.Count; |
| 265 | CountMergeI->Mispreds += JI.Mispreds; |
| 266 | ++CountMergeI; |
| 267 | } |
| 268 | assert(CountMergeI == JTMergeI->second->Counts.end()); |
| 269 | |
| 270 | ++JTMergeI; |
| 271 | } |
| 272 | assert(JTMergeI == BF.JumpTables.end()); |
| 273 | } |
| 274 | |
| 275 | void BinaryFunction::inferFallThroughCounts() { |
| 276 | // Work on a basic block at a time, propagating frequency information |
| 277 | // forwards. |
| 278 | // It is important to walk in the layout order. |
| 279 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 280 | const uint64_t BBExecCount = BB->getExecutionCount(); |
| 281 | |
| 282 | // Propagate this information to successors, filling in fall-through edges |
| 283 | // with frequency information |
| 284 | if (BB->succ_size() == 0) |
| 285 | continue; |
| 286 | |
| 287 | // Calculate frequency of outgoing branches from this node according to |
| 288 | // LBR data. |
| 289 | uint64_t ReportedBranches = 0; |
| 290 | for (const BinaryBasicBlock::BinaryBranchInfo &SuccBI : BB->branch_info()) |
| 291 | if (SuccBI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) |
| 292 | ReportedBranches += SuccBI.Count; |
| 293 | |
| 294 | // Get taken count of conditional tail call if the block ends with one. |
| 295 | uint64_t CTCTakenCount = 0; |
| 296 | const MCInst *CTCInstr = BB->getLastNonPseudoInstr(); |
| 297 | if (CTCInstr && BC.MIB->getConditionalTailCall(Inst: *CTCInstr)) { |
| 298 | CTCTakenCount = BC.MIB->getAnnotationWithDefault<uint64_t>( |
| 299 | Inst: *CTCInstr, Name: "CTCTakenCount" ); |
| 300 | } |
| 301 | |
| 302 | // Calculate frequency of throws from this node according to LBR data |
| 303 | // for branching into associated landing pads. Since it is possible |
| 304 | // for a landing pad to be associated with more than one basic blocks, |
| 305 | // we may overestimate the frequency of throws for such blocks. |
| 306 | uint64_t ReportedThrows = 0; |
| 307 | for (const BinaryBasicBlock *LP : BB->landing_pads()) |
| 308 | ReportedThrows += LP->getExecutionCount(); |
| 309 | |
| 310 | const uint64_t TotalReportedJumps = |
| 311 | ReportedBranches + CTCTakenCount + ReportedThrows; |
| 312 | |
| 313 | // Infer the frequency of the fall-through edge, representing not taking the |
| 314 | // branch. |
| 315 | uint64_t Inferred = 0; |
| 316 | if (BBExecCount > TotalReportedJumps) |
| 317 | Inferred = BBExecCount - TotalReportedJumps; |
| 318 | |
| 319 | LLVM_DEBUG( |
| 320 | if (BBExecCount < TotalReportedJumps) dbgs() |
| 321 | << "Fall-through inference is slightly inconsistent. " |
| 322 | "exec frequency is less than the outgoing edges frequency (" |
| 323 | << BBExecCount << " < " << ReportedBranches |
| 324 | << ") for BB at offset 0x" |
| 325 | << Twine::utohexstr(getAddress() + BB->getOffset()) << '\n';); |
| 326 | |
| 327 | if (BB->succ_size() <= 2) { |
| 328 | // Skip if the last instruction is an unconditional jump. |
| 329 | const MCInst *LastInstr = BB->getLastNonPseudoInstr(); |
| 330 | if (LastInstr && (BC.MIB->isUnconditionalBranch(Inst: *LastInstr) || |
| 331 | BC.MIB->isIndirectBranch(Inst: *LastInstr))) |
| 332 | continue; |
| 333 | // If there is an FT it will be the last successor. |
| 334 | auto &SuccBI = *BB->branch_info_rbegin(); |
| 335 | auto &Succ = *BB->succ_rbegin(); |
| 336 | if (SuccBI.Count == 0) { |
| 337 | SuccBI.Count = Inferred; |
| 338 | SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED; |
| 339 | Succ->ExecutionCount = |
| 340 | std::max(a: Succ->getKnownExecutionCount(), b: Inferred); |
| 341 | } |
| 342 | } |
| 343 | } |
| 344 | } |
| 345 | |
| 346 | void BinaryFunction::clearProfile() { |
| 347 | // Keep function execution profile the same. Only clear basic block and edge |
| 348 | // counts. |
| 349 | for (BinaryBasicBlock *BB : BasicBlocks) { |
| 350 | BB->ExecutionCount = 0; |
| 351 | for (BinaryBasicBlock::BinaryBranchInfo &BI : BB->branch_info()) { |
| 352 | BI.Count = 0; |
| 353 | BI.MispredictedCount = 0; |
| 354 | } |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | } // namespace bolt |
| 359 | } // namespace llvm |
| 360 | |