| 1 | //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file implements the SplitFunctions pass. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "bolt/Passes/SplitFunctions.h" |
| 14 | #include "bolt/Core/BinaryBasicBlock.h" |
| 15 | #include "bolt/Core/BinaryFunction.h" |
| 16 | #include "bolt/Core/FunctionLayout.h" |
| 17 | #include "bolt/Core/ParallelUtilities.h" |
| 18 | #include "bolt/Utils/CommandLineOpts.h" |
| 19 | #include "llvm/ADT/STLExtras.h" |
| 20 | #include "llvm/ADT/SmallVector.h" |
| 21 | #include "llvm/ADT/iterator_range.h" |
| 22 | #include "llvm/Support/CommandLine.h" |
| 23 | #include "llvm/Support/FormatVariadic.h" |
| 24 | #include <algorithm> |
| 25 | #include <iterator> |
| 26 | #include <memory> |
| 27 | #include <numeric> |
| 28 | #include <random> |
| 29 | #include <vector> |
| 30 | |
| 31 | #define DEBUG_TYPE "bolt-opts" |
| 32 | |
| 33 | using namespace llvm; |
| 34 | using namespace bolt; |
| 35 | |
| 36 | namespace { |
| 37 | class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { |
| 38 | public: |
| 39 | explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) |
| 40 | : cl::parser<bool>(O) {} |
| 41 | |
| 42 | bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { |
| 43 | if (Arg == "2" || Arg == "3" ) { |
| 44 | Value = true; |
| 45 | errs() << formatv(Fmt: "BOLT-WARNING: specifying non-boolean value \"{0}\" " |
| 46 | "for option -{1} is deprecated\n" , |
| 47 | Vals&: Arg, Vals&: ArgName); |
| 48 | return false; |
| 49 | } |
| 50 | return cl::parser<bool>::parse(O, ArgName, Arg, Val&: Value); |
| 51 | } |
| 52 | }; |
| 53 | } // namespace |
| 54 | |
| 55 | namespace opts { |
| 56 | |
| 57 | extern cl::OptionCategory BoltOptCategory; |
| 58 | |
| 59 | extern cl::opt<bool> SplitEH; |
| 60 | extern cl::opt<unsigned> ExecutionCountThreshold; |
| 61 | extern cl::opt<uint32_t> RandomSeed; |
| 62 | |
| 63 | static cl::opt<bool> AggressiveSplitting( |
| 64 | "split-all-cold" , cl::desc("outline as many cold basic blocks as possible" ), |
| 65 | cl::cat(BoltOptCategory)); |
| 66 | |
| 67 | static cl::opt<unsigned> SplitAlignThreshold( |
| 68 | "split-align-threshold" , |
| 69 | cl::desc("when deciding to split a function, apply this alignment " |
| 70 | "while doing the size comparison (see -split-threshold). " |
| 71 | "Default value: 2." ), |
| 72 | cl::init(Val: 2), |
| 73 | |
| 74 | cl::Hidden, cl::cat(BoltOptCategory)); |
| 75 | |
| 76 | static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> |
| 77 | SplitFunctions("split-functions" , |
| 78 | cl::desc("split functions into fragments" ), |
| 79 | cl::cat(BoltOptCategory)); |
| 80 | |
| 81 | static cl::opt<unsigned> SplitThreshold( |
| 82 | "split-threshold" , |
| 83 | cl::desc("split function only if its main size is reduced by more than " |
| 84 | "given amount of bytes. Default value: 0, i.e. split iff the " |
| 85 | "size is reduced. Note that on some architectures the size can " |
| 86 | "increase after splitting." ), |
| 87 | cl::init(Val: 0), cl::Hidden, cl::cat(BoltOptCategory)); |
| 88 | |
| 89 | static cl::opt<SplitFunctionsStrategy> SplitStrategy( |
| 90 | "split-strategy" , cl::init(Val: SplitFunctionsStrategy::Profile2), |
| 91 | cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2" , |
| 92 | "split each function into a hot and cold fragment " |
| 93 | "using profiling information" )), |
| 94 | cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit" , |
| 95 | "split each function into a hot, warm, and cold " |
| 96 | "fragment using profiling information" )), |
| 97 | cl::values(clEnumValN( |
| 98 | SplitFunctionsStrategy::Random2, "random2" , |
| 99 | "split each function into a hot and cold fragment at a randomly chosen " |
| 100 | "split point (ignoring any available profiling information)" )), |
| 101 | cl::values(clEnumValN( |
| 102 | SplitFunctionsStrategy::RandomN, "randomN" , |
| 103 | "split each function into N fragments at a randomly chosen split " |
| 104 | "points (ignoring any available profiling information)" )), |
| 105 | cl::values(clEnumValN( |
| 106 | SplitFunctionsStrategy::All, "all" , |
| 107 | "split all basic blocks of each function into fragments such that each " |
| 108 | "fragment contains exactly a single basic block" )), |
| 109 | cl::desc("strategy used to partition blocks into fragments" ), |
| 110 | cl::cat(BoltOptCategory)); |
| 111 | |
| 112 | static cl::opt<double> CallScale( |
| 113 | "call-scale" , |
| 114 | cl::desc("Call score scale coefficient (when --split-strategy=cdsplit)" ), |
| 115 | cl::init(Val: 0.95), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 116 | |
| 117 | static cl::opt<double> |
| 118 | CallPower("call-power" , |
| 119 | cl::desc("Call score power (when --split-strategy=cdsplit)" ), |
| 120 | cl::init(Val: 0.05), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 121 | |
| 122 | static cl::opt<double> |
| 123 | JumpPower("jump-power" , |
| 124 | cl::desc("Jump score power (when --split-strategy=cdsplit)" ), |
| 125 | cl::init(Val: 0.15), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
| 126 | } // namespace opts |
| 127 | |
| 128 | namespace { |
| 129 | bool hasFullProfile(const BinaryFunction &BF) { |
| 130 | return llvm::all_of(Range: BF.blocks(), P: [](const BinaryBasicBlock &BB) { |
| 131 | return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE; |
| 132 | }); |
| 133 | } |
| 134 | |
| 135 | bool allBlocksCold(const BinaryFunction &BF) { |
| 136 | return llvm::all_of(Range: BF.blocks(), P: [](const BinaryBasicBlock &BB) { |
| 137 | return BB.getExecutionCount() == 0; |
| 138 | }); |
| 139 | } |
| 140 | |
| 141 | struct SplitProfile2 final : public SplitStrategy { |
| 142 | bool canSplit(const BinaryFunction &BF) override { |
| 143 | return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); |
| 144 | } |
| 145 | |
| 146 | bool compactFragments() override { return true; } |
| 147 | |
| 148 | void fragment(const BlockIt Start, const BlockIt End) override { |
| 149 | for (BinaryBasicBlock *const BB : llvm::make_range(x: Start, y: End)) { |
| 150 | if (BB->getExecutionCount() == 0) |
| 151 | BB->setFragmentNum(FragmentNum::cold()); |
| 152 | } |
| 153 | } |
| 154 | }; |
| 155 | |
| 156 | struct SplitCacheDirected final : public SplitStrategy { |
| 157 | BinaryContext &BC; |
| 158 | using BasicBlockOrder = BinaryFunction::BasicBlockOrderType; |
| 159 | |
| 160 | bool canSplit(const BinaryFunction &BF) override { |
| 161 | return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); |
| 162 | } |
| 163 | |
| 164 | explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) { |
| 165 | initializeAuxiliaryVariables(); |
| 166 | buildCallGraph(); |
| 167 | } |
| 168 | |
| 169 | // When some functions are hot-warm split and others are hot-warm-cold split, |
| 170 | // we do not want to change the fragment numbers of the blocks in the hot-warm |
| 171 | // split functions. |
| 172 | bool compactFragments() override { return false; } |
| 173 | |
| 174 | void fragment(const BlockIt Start, const BlockIt End) override { |
| 175 | BasicBlockOrder BlockOrder(Start, End); |
| 176 | BinaryFunction &BF = *BlockOrder.front()->getFunction(); |
| 177 | // No need to re-split small functions. |
| 178 | if (BlockOrder.size() <= 2) |
| 179 | return; |
| 180 | |
| 181 | size_t BestSplitIndex = findSplitIndex(BF, BlockOrder); |
| 182 | assert(BestSplitIndex < BlockOrder.size()); |
| 183 | |
| 184 | // Assign fragments based on the computed best split index. |
| 185 | // All basic blocks with index up to the best split index become hot. |
| 186 | // All remaining blocks are warm / cold depending on if count is |
| 187 | // greater than zero or not. |
| 188 | for (size_t Index = 0; Index < BlockOrder.size(); Index++) { |
| 189 | BinaryBasicBlock *BB = BlockOrder[Index]; |
| 190 | if (Index <= BestSplitIndex) |
| 191 | BB->setFragmentNum(FragmentNum::main()); |
| 192 | else |
| 193 | BB->setFragmentNum(BB->getKnownExecutionCount() > 0 |
| 194 | ? FragmentNum::warm() |
| 195 | : FragmentNum::cold()); |
| 196 | } |
| 197 | } |
| 198 | |
| 199 | private: |
| 200 | struct CallInfo { |
| 201 | size_t Length; |
| 202 | size_t Count; |
| 203 | }; |
| 204 | |
| 205 | struct SplitScore { |
| 206 | size_t SplitIndex = size_t(-1); |
| 207 | size_t HotSizeReduction = 0; |
| 208 | double LocalScore = 0; |
| 209 | double CoverCallScore = 0; |
| 210 | |
| 211 | double sum() const { return LocalScore + CoverCallScore; } |
| 212 | }; |
| 213 | |
| 214 | // Auxiliary variables used by the algorithm. |
| 215 | size_t TotalNumBlocks{0}; |
| 216 | size_t OrigHotSectionSize{0}; |
| 217 | DenseMap<const BinaryBasicBlock *, size_t> GlobalIndices; |
| 218 | DenseMap<const BinaryBasicBlock *, size_t> BBSizes; |
| 219 | DenseMap<const BinaryBasicBlock *, size_t> BBOffsets; |
| 220 | |
| 221 | // Call graph. |
| 222 | std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callers; |
| 223 | std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callees; |
| 224 | |
| 225 | bool shouldConsiderForCallGraph(const BinaryFunction &BF) { |
| 226 | // Only a subset of the functions in the binary will be considered |
| 227 | // for initializing auxiliary variables and building call graph. |
| 228 | return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty(); |
| 229 | } |
| 230 | |
| 231 | void initializeAuxiliaryVariables() { |
| 232 | for (BinaryFunction *BF : BC.getSortedFunctions()) { |
| 233 | if (!shouldConsiderForCallGraph(BF: *BF)) |
| 234 | continue; |
| 235 | |
| 236 | // Calculate the size of each BB after hot-cold splitting. |
| 237 | // This populates BinaryBasicBlock::OutputAddressRange which |
| 238 | // can be used to compute the size of each BB. |
| 239 | BC.calculateEmittedSize(BF&: *BF, /*FixBranches=*/true); |
| 240 | |
| 241 | for (BinaryBasicBlock *BB : BF->getLayout().blocks()) { |
| 242 | // Unique global index. |
| 243 | GlobalIndices[BB] = TotalNumBlocks; |
| 244 | TotalNumBlocks++; |
| 245 | |
| 246 | // Block size after hot-cold splitting. |
| 247 | BBSizes[BB] = BB->getOutputSize(); |
| 248 | |
| 249 | // Hot block offset after hot-cold splitting. |
| 250 | BBOffsets[BB] = OrigHotSectionSize; |
| 251 | if (!BB->isSplit()) |
| 252 | OrigHotSectionSize += BBSizes[BB]; |
| 253 | } |
| 254 | } |
| 255 | } |
| 256 | |
| 257 | void buildCallGraph() { |
| 258 | Callers.resize(new_size: TotalNumBlocks); |
| 259 | Callees.resize(new_size: TotalNumBlocks); |
| 260 | for (const BinaryFunction *SrcFunction : BC.getSortedFunctions()) { |
| 261 | if (!shouldConsiderForCallGraph(BF: *SrcFunction)) |
| 262 | continue; |
| 263 | |
| 264 | for (BinaryBasicBlock &SrcBB : SrcFunction->blocks()) { |
| 265 | // Skip blocks that are not executed |
| 266 | if (SrcBB.getKnownExecutionCount() == 0) |
| 267 | continue; |
| 268 | |
| 269 | // Find call instructions and extract target symbols from each one |
| 270 | for (const MCInst &Inst : SrcBB) { |
| 271 | if (!BC.MIB->isCall(Inst)) |
| 272 | continue; |
| 273 | |
| 274 | // Call info |
| 275 | const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); |
| 276 | // Ignore calls w/o information |
| 277 | if (!DstSym) |
| 278 | continue; |
| 279 | |
| 280 | const BinaryFunction *DstFunction = BC.getFunctionForSymbol(Symbol: DstSym); |
| 281 | // Ignore calls that do not have a valid target, but do not ignore |
| 282 | // recursive calls, because caller block could be moved to warm. |
| 283 | if (!DstFunction || DstFunction->getLayout().block_empty()) |
| 284 | continue; |
| 285 | |
| 286 | const BinaryBasicBlock *DstBB = &(DstFunction->front()); |
| 287 | |
| 288 | // Record the call only if DstBB is also in functions to consider for |
| 289 | // call graph. |
| 290 | if (GlobalIndices.contains(Val: DstBB)) { |
| 291 | Callers[GlobalIndices[DstBB]].push_back(Elt: &SrcBB); |
| 292 | Callees[GlobalIndices[&SrcBB]].push_back(Elt: DstBB); |
| 293 | } |
| 294 | } |
| 295 | } |
| 296 | } |
| 297 | } |
| 298 | |
| 299 | /// Populate BinaryBasicBlock::OutputAddressRange with estimated basic block |
| 300 | /// start and end addresses for hot and warm basic blocks, assuming hot-warm |
| 301 | /// splitting happens at \p SplitIndex. Also return estimated end addresses |
| 302 | /// of the hot fragment before and after splitting. |
| 303 | /// The estimations take into account the potential addition of branch |
| 304 | /// instructions due to split fall through branches as well as the need to |
| 305 | /// use longer branch instructions for split (un)conditional branches. |
| 306 | std::pair<size_t, size_t> |
| 307 | estimatePostSplitBBAddress(const BasicBlockOrder &BlockOrder, |
| 308 | const size_t SplitIndex) { |
| 309 | assert(SplitIndex < BlockOrder.size() && "Invalid split index" ); |
| 310 | |
| 311 | // Update function layout assuming hot-warm splitting at SplitIndex. |
| 312 | for (size_t Index = 0; Index < BlockOrder.size(); Index++) { |
| 313 | BinaryBasicBlock *BB = BlockOrder[Index]; |
| 314 | if (BB->getFragmentNum() == FragmentNum::cold()) |
| 315 | break; |
| 316 | BB->setFragmentNum(Index <= SplitIndex ? FragmentNum::main() |
| 317 | : FragmentNum::warm()); |
| 318 | } |
| 319 | BinaryFunction *BF = BlockOrder[0]->getFunction(); |
| 320 | BF->getLayout().update(NewLayout: BlockOrder); |
| 321 | // Populate BB.OutputAddressRange under the updated layout. |
| 322 | BC.calculateEmittedSize(BF&: *BF); |
| 323 | |
| 324 | // Populate BB.OutputAddressRange with estimated new start and end addresses |
| 325 | // and compute the old end address of the hot section and the new end |
| 326 | // address of the hot section. |
| 327 | size_t OldHotEndAddr{0}; |
| 328 | size_t NewHotEndAddr{0}; |
| 329 | size_t CurrentAddr = BBOffsets[BlockOrder[0]]; |
| 330 | for (BinaryBasicBlock *BB : BlockOrder) { |
| 331 | // We only care about new addresses of blocks in hot/warm. |
| 332 | if (BB->getFragmentNum() == FragmentNum::cold()) |
| 333 | break; |
| 334 | const size_t NewSize = BB->getOutputSize(); |
| 335 | BB->setOutputStartAddress(CurrentAddr); |
| 336 | CurrentAddr += NewSize; |
| 337 | BB->setOutputEndAddress(CurrentAddr); |
| 338 | if (BB->getLayoutIndex() == SplitIndex) { |
| 339 | NewHotEndAddr = CurrentAddr; |
| 340 | // Approximate the start address of the warm fragment of the current |
| 341 | // function using the original hot section size. |
| 342 | CurrentAddr = OrigHotSectionSize; |
| 343 | } |
| 344 | OldHotEndAddr = BBOffsets[BB] + BBSizes[BB]; |
| 345 | } |
| 346 | return std::make_pair(x&: OldHotEndAddr, y&: NewHotEndAddr); |
| 347 | } |
| 348 | |
| 349 | /// Get a collection of "shortenable" calls, that is, calls of type X->Y |
| 350 | /// when the function order is [... X ... BF ... Y ...]. |
| 351 | /// If the hot fragment size of BF is reduced, then such calls are guaranteed |
| 352 | /// to get shorter by the reduced hot fragment size. |
| 353 | std::vector<CallInfo> (const BinaryFunction &BF) { |
| 354 | // Record the length and the count of the calls that can be shortened |
| 355 | std::vector<CallInfo> CoverCalls; |
| 356 | if (opts::CallScale == 0) |
| 357 | return CoverCalls; |
| 358 | |
| 359 | const BinaryFunction *ThisBF = &BF; |
| 360 | const BinaryBasicBlock *ThisBB = &(ThisBF->front()); |
| 361 | const size_t ThisGI = GlobalIndices[ThisBB]; |
| 362 | |
| 363 | for (const BinaryFunction *DstBF : BC.getSortedFunctions()) { |
| 364 | if (!shouldConsiderForCallGraph(BF: *DstBF)) |
| 365 | continue; |
| 366 | |
| 367 | const BinaryBasicBlock *DstBB = &(DstBF->front()); |
| 368 | if (DstBB->getKnownExecutionCount() == 0) |
| 369 | continue; |
| 370 | |
| 371 | const size_t DstGI = GlobalIndices[DstBB]; |
| 372 | for (const BinaryBasicBlock *SrcBB : Callers[DstGI]) { |
| 373 | const BinaryFunction *SrcBF = SrcBB->getFunction(); |
| 374 | if (ThisBF == SrcBF) |
| 375 | continue; |
| 376 | |
| 377 | const size_t CallCount = SrcBB->getKnownExecutionCount(); |
| 378 | |
| 379 | const size_t SrcGI = GlobalIndices[SrcBB]; |
| 380 | |
| 381 | const bool IsCoverCall = (SrcGI < ThisGI && ThisGI < DstGI) || |
| 382 | (DstGI <= ThisGI && ThisGI < SrcGI); |
| 383 | if (!IsCoverCall) |
| 384 | continue; |
| 385 | |
| 386 | const size_t SrcBBEndAddr = BBOffsets[SrcBB] + BBSizes[SrcBB]; |
| 387 | const size_t DstBBStartAddr = BBOffsets[DstBB]; |
| 388 | const size_t CallLength = |
| 389 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
| 390 | const CallInfo CI{.Length: CallLength, .Count: CallCount}; |
| 391 | CoverCalls.emplace_back(args: CI); |
| 392 | } |
| 393 | } |
| 394 | return CoverCalls; |
| 395 | } |
| 396 | |
| 397 | /// Compute the edge score of a call edge. |
| 398 | double computeCallScore(uint64_t CallCount, size_t CallLength) { |
| 399 | // Increase call lengths by 1 to avoid raising 0 to a negative power. |
| 400 | return opts::CallScale * static_cast<double>(CallCount) / |
| 401 | std::pow(x: static_cast<double>(CallLength + 1), y: opts::CallPower); |
| 402 | } |
| 403 | |
| 404 | /// Compute the edge score of a jump (branch) edge. |
| 405 | double computeJumpScore(uint64_t JumpCount, size_t JumpLength) { |
| 406 | // Increase jump lengths by 1 to avoid raising 0 to a negative power. |
| 407 | return static_cast<double>(JumpCount) / |
| 408 | std::pow(x: static_cast<double>(JumpLength + 1), y: opts::JumpPower); |
| 409 | } |
| 410 | |
| 411 | /// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex. |
| 412 | /// Increament Score.LocalScore in place by the sum. |
| 413 | void computeJumpScore(const BasicBlockOrder &BlockOrder, |
| 414 | const size_t SplitIndex, SplitScore &Score) { |
| 415 | |
| 416 | for (const BinaryBasicBlock *SrcBB : BlockOrder) { |
| 417 | if (SrcBB->getKnownExecutionCount() == 0) |
| 418 | continue; |
| 419 | |
| 420 | const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; |
| 421 | |
| 422 | for (const auto Pair : zip(t: SrcBB->successors(), u: SrcBB->branch_info())) { |
| 423 | const BinaryBasicBlock *DstBB = std::get<0>(t: Pair); |
| 424 | const BinaryBasicBlock::BinaryBranchInfo &Branch = std::get<1>(t: Pair); |
| 425 | const size_t JumpCount = Branch.Count; |
| 426 | |
| 427 | if (JumpCount == 0) |
| 428 | continue; |
| 429 | |
| 430 | const size_t DstBBStartAddr = DstBB->getOutputAddressRange().first; |
| 431 | const size_t NewJumpLength = |
| 432 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
| 433 | Score.LocalScore += computeJumpScore(JumpCount, JumpLength: NewJumpLength); |
| 434 | } |
| 435 | } |
| 436 | } |
| 437 | |
| 438 | /// Compute sum of scores over calls originated in the current function |
| 439 | /// given \p SplitIndex. Increament Score.LocalScore in place by the sum. |
| 440 | void computeLocalCallScore(const BasicBlockOrder &BlockOrder, |
| 441 | const size_t SplitIndex, SplitScore &Score) { |
| 442 | if (opts::CallScale == 0) |
| 443 | return; |
| 444 | |
| 445 | // Global index of the last block in the current function. |
| 446 | // This is later used to determine whether a call originated in the current |
| 447 | // function is to a function that comes after the current function. |
| 448 | const size_t LastGlobalIndex = GlobalIndices[BlockOrder.back()]; |
| 449 | |
| 450 | // The length of calls originated in the input function can increase / |
| 451 | // decrease depending on the splitting decision. |
| 452 | for (const BinaryBasicBlock *SrcBB : BlockOrder) { |
| 453 | const size_t CallCount = SrcBB->getKnownExecutionCount(); |
| 454 | // If SrcBB does not call any functions, skip it. |
| 455 | if (CallCount == 0) |
| 456 | continue; |
| 457 | |
| 458 | // Obtain an estimate on the end address of the src basic block |
| 459 | // after splitting at SplitIndex. |
| 460 | const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; |
| 461 | |
| 462 | for (const BinaryBasicBlock *DstBB : Callees[GlobalIndices[SrcBB]]) { |
| 463 | // Obtain an estimate on the start address of the dst basic block |
| 464 | // after splitting at SplitIndex. If DstBB is in a function before |
| 465 | // the current function, then its start address remains unchanged. |
| 466 | size_t DstBBStartAddr = BBOffsets[DstBB]; |
| 467 | // If DstBB is in a function after the current function, then its |
| 468 | // start address should be adjusted based on the reduction in hot size. |
| 469 | if (GlobalIndices[DstBB] > LastGlobalIndex) { |
| 470 | assert(DstBBStartAddr >= Score.HotSizeReduction); |
| 471 | DstBBStartAddr -= Score.HotSizeReduction; |
| 472 | } |
| 473 | const size_t NewCallLength = |
| 474 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
| 475 | Score.LocalScore += computeCallScore(CallCount, CallLength: NewCallLength); |
| 476 | } |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | /// Compute sum of splitting scores for cover calls of the input function. |
| 481 | /// Increament Score.CoverCallScore in place by the sum. |
| 482 | void computeCoverCallScore(const BasicBlockOrder &BlockOrder, |
| 483 | const size_t SplitIndex, |
| 484 | const std::vector<CallInfo> &CoverCalls, |
| 485 | SplitScore &Score) { |
| 486 | if (opts::CallScale == 0) |
| 487 | return; |
| 488 | |
| 489 | for (const CallInfo CI : CoverCalls) { |
| 490 | assert(CI.Length >= Score.HotSizeReduction && |
| 491 | "Length of cover calls must exceed reduced size of hot fragment." ); |
| 492 | // Compute the new length of the call, which is shorter than the original |
| 493 | // one by the size of the splitted fragment minus the total size increase. |
| 494 | const size_t NewCallLength = CI.Length - Score.HotSizeReduction; |
| 495 | Score.CoverCallScore += computeCallScore(CallCount: CI.Count, CallLength: NewCallLength); |
| 496 | } |
| 497 | } |
| 498 | |
| 499 | /// Compute the split score of splitting a function at a given index. |
| 500 | /// The split score consists of local score and cover score. This function |
| 501 | /// returns \p Score of SplitScore type. It contains the local score and |
| 502 | /// cover score of the current splitting index. For easier book keeping and |
| 503 | /// comparison, it also stores the split index and the resulting reduction |
| 504 | /// in hot fragment size. |
| 505 | SplitScore computeSplitScore(const BinaryFunction &BF, |
| 506 | const BasicBlockOrder &BlockOrder, |
| 507 | const size_t SplitIndex, |
| 508 | const std::vector<CallInfo> &CoverCalls) { |
| 509 | // Populate BinaryBasicBlock::OutputAddressRange with estimated |
| 510 | // new start and end addresses after hot-warm splitting at SplitIndex. |
| 511 | size_t OldHotEnd; |
| 512 | size_t NewHotEnd; |
| 513 | std::tie(args&: OldHotEnd, args&: NewHotEnd) = |
| 514 | estimatePostSplitBBAddress(BlockOrder, SplitIndex); |
| 515 | |
| 516 | SplitScore Score; |
| 517 | Score.SplitIndex = SplitIndex; |
| 518 | |
| 519 | // It's not worth splitting if OldHotEnd < NewHotEnd. |
| 520 | if (OldHotEnd < NewHotEnd) |
| 521 | return Score; |
| 522 | |
| 523 | // Hot fragment size reduction due to splitting. |
| 524 | Score.HotSizeReduction = OldHotEnd - NewHotEnd; |
| 525 | |
| 526 | // First part of LocalScore is the sum over call edges originated in the |
| 527 | // input function. These edges can get shorter or longer depending on |
| 528 | // SplitIndex. Score.LocalScore is increamented in place. |
| 529 | computeLocalCallScore(BlockOrder, SplitIndex, Score); |
| 530 | |
| 531 | // Second part of LocalScore is the sum over jump edges with src basic block |
| 532 | // and dst basic block in the current function. Score.LocalScore is |
| 533 | // increamented in place. |
| 534 | computeJumpScore(BlockOrder, SplitIndex, Score); |
| 535 | |
| 536 | // Compute CoverCallScore and store in Score in place. |
| 537 | computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score); |
| 538 | return Score; |
| 539 | } |
| 540 | |
| 541 | /// Find the most likely successor of a basic block when it has one or two |
| 542 | /// successors. Return nullptr otherwise. |
| 543 | const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) { |
| 544 | if (BB->succ_size() == 1) |
| 545 | return BB->getSuccessor(); |
| 546 | if (BB->succ_size() == 2) { |
| 547 | uint64_t TakenCount = BB->getTakenBranchInfo().Count; |
| 548 | assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); |
| 549 | uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; |
| 550 | assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); |
| 551 | if (TakenCount > NonTakenCount) |
| 552 | return BB->getConditionalSuccessor(Condition: true); |
| 553 | else if (TakenCount < NonTakenCount) |
| 554 | return BB->getConditionalSuccessor(Condition: false); |
| 555 | } |
| 556 | return nullptr; |
| 557 | } |
| 558 | |
| 559 | /// Find the best index for splitting. The returned value is the index of the |
| 560 | /// last hot basic block. Hence, "no splitting" is equivalent to returning the |
| 561 | /// value which is one less than the size of the function. |
| 562 | size_t findSplitIndex(const BinaryFunction &BF, |
| 563 | const BasicBlockOrder &BlockOrder) { |
| 564 | assert(BlockOrder.size() > 2); |
| 565 | // Find all function calls that can be shortened if we move blocks of the |
| 566 | // current function to warm/cold |
| 567 | const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF); |
| 568 | |
| 569 | // Find the existing hot-cold splitting index. |
| 570 | size_t HotColdIndex = 0; |
| 571 | while (HotColdIndex + 1 < BlockOrder.size()) { |
| 572 | if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold()) |
| 573 | break; |
| 574 | HotColdIndex++; |
| 575 | } |
| 576 | assert(HotColdIndex + 1 == BlockOrder.size() || |
| 577 | (BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() && |
| 578 | BlockOrder[HotColdIndex + 1]->getFragmentNum() == |
| 579 | FragmentNum::cold())); |
| 580 | |
| 581 | // Try all possible split indices up to HotColdIndex (blocks that have |
| 582 | // Index <= SplitIndex are in hot) and find the one maximizing the |
| 583 | // splitting score. |
| 584 | SplitScore BestScore; |
| 585 | for (size_t Index = 0; Index <= HotColdIndex; Index++) { |
| 586 | const BinaryBasicBlock *LastHotBB = BlockOrder[Index]; |
| 587 | assert(LastHotBB->getFragmentNum() != FragmentNum::cold()); |
| 588 | |
| 589 | // Do not break jump to the most likely successor. |
| 590 | if (Index + 1 < BlockOrder.size() && |
| 591 | BlockOrder[Index + 1] == getMostLikelySuccessor(BB: LastHotBB)) |
| 592 | continue; |
| 593 | |
| 594 | const SplitScore Score = |
| 595 | computeSplitScore(BF, BlockOrder, SplitIndex: Index, CoverCalls); |
| 596 | if (Score.sum() > BestScore.sum()) |
| 597 | BestScore = Score; |
| 598 | } |
| 599 | |
| 600 | // If we don't find a good splitting point, fallback to the original one. |
| 601 | if (BestScore.SplitIndex == size_t(-1)) |
| 602 | return HotColdIndex; |
| 603 | |
| 604 | return BestScore.SplitIndex; |
| 605 | } |
| 606 | }; |
| 607 | |
| 608 | struct SplitRandom2 final : public SplitStrategy { |
| 609 | std::minstd_rand0 Gen; |
| 610 | |
| 611 | SplitRandom2() : Gen(opts::RandomSeed.getValue()) {} |
| 612 | |
| 613 | bool canSplit(const BinaryFunction &BF) override { return true; } |
| 614 | |
| 615 | bool compactFragments() override { return true; } |
| 616 | |
| 617 | void fragment(const BlockIt Start, const BlockIt End) override { |
| 618 | using DiffT = typename std::iterator_traits<BlockIt>::difference_type; |
| 619 | const DiffT NumBlocks = End - Start; |
| 620 | assert(NumBlocks > 0 && "Cannot fragment empty function" ); |
| 621 | |
| 622 | // We want to split at least one block |
| 623 | const auto LastSplitPoint = std::max<DiffT>(a: NumBlocks - 1, b: 1); |
| 624 | std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint); |
| 625 | const DiffT SplitPoint = Dist(Gen); |
| 626 | for (BinaryBasicBlock *BB : llvm::make_range(x: Start + SplitPoint, y: End)) |
| 627 | BB->setFragmentNum(FragmentNum::cold()); |
| 628 | |
| 629 | LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " |
| 630 | "{1} possible) blocks to split\n" , |
| 631 | NumBlocks - SplitPoint, End - Start)); |
| 632 | } |
| 633 | }; |
| 634 | |
| 635 | struct SplitRandomN final : public SplitStrategy { |
| 636 | std::minstd_rand0 Gen; |
| 637 | |
| 638 | SplitRandomN() : Gen(opts::RandomSeed.getValue()) {} |
| 639 | |
| 640 | bool canSplit(const BinaryFunction &BF) override { return true; } |
| 641 | |
| 642 | bool compactFragments() override { return true; } |
| 643 | |
| 644 | void fragment(const BlockIt Start, const BlockIt End) override { |
| 645 | using DiffT = typename std::iterator_traits<BlockIt>::difference_type; |
| 646 | const DiffT NumBlocks = End - Start; |
| 647 | assert(NumBlocks > 0 && "Cannot fragment empty function" ); |
| 648 | |
| 649 | // With n blocks, there are n-1 places to split them. |
| 650 | const DiffT MaximumSplits = NumBlocks - 1; |
| 651 | // We want to generate at least two fragment if possible, but if there is |
| 652 | // only one block, no splits are possible. |
| 653 | const auto MinimumSplits = std::min<DiffT>(a: MaximumSplits, b: 1); |
| 654 | std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits); |
| 655 | // Choose how many splits to perform |
| 656 | const DiffT NumSplits = Dist(Gen); |
| 657 | |
| 658 | // Draw split points from a lottery |
| 659 | SmallVector<unsigned, 0> Lottery(MaximumSplits); |
| 660 | // Start lottery at 1, because there is no meaningful splitpoint before the |
| 661 | // first block. |
| 662 | std::iota(first: Lottery.begin(), last: Lottery.end(), value: 1u); |
| 663 | std::shuffle(first: Lottery.begin(), last: Lottery.end(), g&: Gen); |
| 664 | Lottery.resize(N: NumSplits); |
| 665 | llvm::sort(C&: Lottery); |
| 666 | |
| 667 | // Add one past the end entry to lottery |
| 668 | Lottery.push_back(Elt: NumBlocks); |
| 669 | |
| 670 | unsigned LotteryIndex = 0; |
| 671 | unsigned BBPos = 0; |
| 672 | for (BinaryBasicBlock *const BB : make_range(x: Start, y: End)) { |
| 673 | // Check whether to start new fragment |
| 674 | if (BBPos >= Lottery[LotteryIndex]) |
| 675 | ++LotteryIndex; |
| 676 | |
| 677 | // Because LotteryIndex is 0 based and cold fragments are 1 based, we can |
| 678 | // use the index to assign fragments. |
| 679 | BB->setFragmentNum(FragmentNum(LotteryIndex)); |
| 680 | |
| 681 | ++BBPos; |
| 682 | } |
| 683 | } |
| 684 | }; |
| 685 | |
| 686 | struct SplitAll final : public SplitStrategy { |
| 687 | bool canSplit(const BinaryFunction &BF) override { return true; } |
| 688 | |
| 689 | bool compactFragments() override { |
| 690 | // Keeping empty fragments allows us to test, that empty fragments do not |
| 691 | // generate symbols. |
| 692 | return false; |
| 693 | } |
| 694 | |
| 695 | void fragment(const BlockIt Start, const BlockIt End) override { |
| 696 | unsigned Fragment = 0; |
| 697 | for (BinaryBasicBlock *const BB : llvm::make_range(x: Start, y: End)) |
| 698 | BB->setFragmentNum(FragmentNum(Fragment++)); |
| 699 | } |
| 700 | }; |
| 701 | } // namespace |
| 702 | |
| 703 | namespace llvm { |
| 704 | namespace bolt { |
| 705 | |
| 706 | bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { |
| 707 | // Apply execution count threshold |
| 708 | if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) |
| 709 | return false; |
| 710 | |
| 711 | return BinaryFunctionPass::shouldOptimize(BF); |
| 712 | } |
| 713 | |
| 714 | Error SplitFunctions::runOnFunctions(BinaryContext &BC) { |
| 715 | if (!opts::SplitFunctions) |
| 716 | return Error::success(); |
| 717 | |
| 718 | if (BC.IsLinuxKernel && BC.BOLTReserved.empty()) { |
| 719 | BC.errs() << "BOLT-ERROR: split functions require reserved space in the " |
| 720 | "Linux kernel binary\n" ; |
| 721 | exit(status: 1); |
| 722 | } |
| 723 | |
| 724 | // If split strategy is not CDSplit, then a second run of the pass is not |
| 725 | // needed after function reordering. |
| 726 | if (BC.HasFinalizedFunctionOrder && |
| 727 | opts::SplitStrategy != SplitFunctionsStrategy::CDSplit) |
| 728 | return Error::success(); |
| 729 | |
| 730 | std::unique_ptr<SplitStrategy> Strategy; |
| 731 | bool ForceSequential = false; |
| 732 | |
| 733 | switch (opts::SplitStrategy) { |
| 734 | case SplitFunctionsStrategy::CDSplit: |
| 735 | // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2) |
| 736 | // before function reordering and hot-warm-cold splitting |
| 737 | // (SplitCacheDirected) after function reordering. |
| 738 | if (BC.HasFinalizedFunctionOrder) |
| 739 | Strategy = std::make_unique<SplitCacheDirected>(args&: BC); |
| 740 | else |
| 741 | Strategy = std::make_unique<SplitProfile2>(); |
| 742 | opts::AggressiveSplitting = true; |
| 743 | BC.HasWarmSection = true; |
| 744 | break; |
| 745 | case SplitFunctionsStrategy::Profile2: |
| 746 | Strategy = std::make_unique<SplitProfile2>(); |
| 747 | break; |
| 748 | case SplitFunctionsStrategy::Random2: |
| 749 | Strategy = std::make_unique<SplitRandom2>(); |
| 750 | // If we split functions randomly, we need to ensure that across runs with |
| 751 | // the same input, we generate random numbers for each function in the same |
| 752 | // order. |
| 753 | ForceSequential = true; |
| 754 | break; |
| 755 | case SplitFunctionsStrategy::RandomN: |
| 756 | Strategy = std::make_unique<SplitRandomN>(); |
| 757 | ForceSequential = true; |
| 758 | break; |
| 759 | case SplitFunctionsStrategy::All: |
| 760 | Strategy = std::make_unique<SplitAll>(); |
| 761 | break; |
| 762 | } |
| 763 | |
| 764 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
| 765 | return !shouldOptimize(BF); |
| 766 | }; |
| 767 | |
| 768 | ParallelUtilities::runOnEachFunction( |
| 769 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, |
| 770 | WorkFunction: [&](BinaryFunction &BF) { splitFunction(Function&: BF, Strategy&: *Strategy); }, SkipPredicate: SkipFunc, |
| 771 | LogName: "SplitFunctions" , ForceSequential); |
| 772 | |
| 773 | if (SplitBytesHot + SplitBytesCold > 0) |
| 774 | BC.outs() << "BOLT-INFO: splitting separates " << SplitBytesHot |
| 775 | << " hot bytes from " << SplitBytesCold << " cold bytes " |
| 776 | << format(Fmt: "(%.2lf%% of split functions is hot).\n" , |
| 777 | Vals: 100.0 * SplitBytesHot / |
| 778 | (SplitBytesHot + SplitBytesCold)); |
| 779 | return Error::success(); |
| 780 | } |
| 781 | |
| 782 | void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { |
| 783 | if (BF.empty()) |
| 784 | return; |
| 785 | |
| 786 | if (!S.canSplit(BF)) |
| 787 | return; |
| 788 | |
| 789 | FunctionLayout &Layout = BF.getLayout(); |
| 790 | BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), |
| 791 | Layout.block_end()); |
| 792 | |
| 793 | BinaryContext &BC = BF.getBinaryContext(); |
| 794 | size_t OriginalHotSize; |
| 795 | size_t HotSize; |
| 796 | size_t ColdSize; |
| 797 | if (BC.isX86()) { |
| 798 | std::tie(args&: OriginalHotSize, args&: ColdSize) = BC.calculateEmittedSize(BF); |
| 799 | LLVM_DEBUG(dbgs() << "Estimated size for function " << BF |
| 800 | << " pre-split is <0x" |
| 801 | << Twine::utohexstr(OriginalHotSize) << ", 0x" |
| 802 | << Twine::utohexstr(ColdSize) << ">\n" ); |
| 803 | } |
| 804 | |
| 805 | BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), |
| 806 | Layout.block_end()); |
| 807 | // Never outline the first basic block. |
| 808 | NewLayout.front()->setCanOutline(false); |
| 809 | for (BinaryBasicBlock *const BB : NewLayout) { |
| 810 | if (!BB->canOutline()) |
| 811 | continue; |
| 812 | |
| 813 | // Do not split extra entry points in aarch64. They can be referred by |
| 814 | // using ADRs and when this happens, these blocks cannot be placed far |
| 815 | // away due to the limited range in ADR instruction. |
| 816 | if (BC.isAArch64() && BB->isEntryPoint()) { |
| 817 | BB->setCanOutline(false); |
| 818 | continue; |
| 819 | } |
| 820 | |
| 821 | if (BF.hasEHRanges() && !opts::SplitEH) { |
| 822 | // We cannot move landing pads (or rather entry points for landing pads). |
| 823 | if (BB->isLandingPad()) { |
| 824 | BB->setCanOutline(false); |
| 825 | continue; |
| 826 | } |
| 827 | // We cannot move a block that can throw since exception-handling |
| 828 | // runtime cannot deal with split functions. However, if we can guarantee |
| 829 | // that the block never throws, it is safe to move the block to |
| 830 | // decrease the size of the function. |
| 831 | for (MCInst &Instr : *BB) { |
| 832 | if (BC.MIB->isInvoke(Inst: Instr)) { |
| 833 | BB->setCanOutline(false); |
| 834 | break; |
| 835 | } |
| 836 | } |
| 837 | } |
| 838 | |
| 839 | // Outlining blocks with dynamic branches is not supported yet. |
| 840 | if (BC.IsLinuxKernel) { |
| 841 | if (llvm::any_of( |
| 842 | Range&: *BB, P: [&](MCInst &Inst) { return BC.MIB->isDynamicBranch(Inst); })) |
| 843 | BB->setCanOutline(false); |
| 844 | } |
| 845 | } |
| 846 | |
| 847 | BF.getLayout().updateLayoutIndices(); |
| 848 | S.fragment(Start: NewLayout.begin(), End: NewLayout.end()); |
| 849 | |
| 850 | // Make sure all non-outlineable blocks are in the main-fragment. |
| 851 | for (BinaryBasicBlock *const BB : NewLayout) { |
| 852 | if (!BB->canOutline()) |
| 853 | BB->setFragmentNum(FragmentNum::main()); |
| 854 | } |
| 855 | |
| 856 | if (opts::AggressiveSplitting) { |
| 857 | // All blocks with 0 count that we can move go to the end of the function. |
| 858 | // Even if they were natural to cluster formation and were seen in-between |
| 859 | // hot basic blocks. |
| 860 | llvm::stable_sort(Range&: NewLayout, C: [&](const BinaryBasicBlock *const A, |
| 861 | const BinaryBasicBlock *const B) { |
| 862 | return A->getFragmentNum() < B->getFragmentNum(); |
| 863 | }); |
| 864 | } else if (BF.hasEHRanges() && !opts::SplitEH) { |
| 865 | // Typically functions with exception handling have landing pads at the end. |
| 866 | // We cannot move beginning of landing pads, but we can move 0-count blocks |
| 867 | // comprising landing pads to the end and thus facilitate splitting. |
| 868 | auto FirstLP = NewLayout.begin(); |
| 869 | while ((*FirstLP)->isLandingPad()) |
| 870 | ++FirstLP; |
| 871 | |
| 872 | std::stable_sort(first: FirstLP, last: NewLayout.end(), |
| 873 | comp: [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { |
| 874 | return A->getFragmentNum() < B->getFragmentNum(); |
| 875 | }); |
| 876 | } |
| 877 | |
| 878 | // Make sure that fragments are increasing. |
| 879 | FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum(); |
| 880 | for (BinaryBasicBlock *const BB : reverse(C&: NewLayout)) { |
| 881 | if (BB->getFragmentNum() > CurrentFragment) |
| 882 | BB->setFragmentNum(CurrentFragment); |
| 883 | CurrentFragment = BB->getFragmentNum(); |
| 884 | } |
| 885 | |
| 886 | if (S.compactFragments()) { |
| 887 | FragmentNum CurrentFragment = FragmentNum::main(); |
| 888 | FragmentNum NewFragment = FragmentNum::main(); |
| 889 | for (BinaryBasicBlock *const BB : NewLayout) { |
| 890 | if (BB->getFragmentNum() > CurrentFragment) { |
| 891 | CurrentFragment = BB->getFragmentNum(); |
| 892 | NewFragment = FragmentNum(NewFragment.get() + 1); |
| 893 | } |
| 894 | BB->setFragmentNum(NewFragment); |
| 895 | } |
| 896 | } |
| 897 | |
| 898 | const bool LayoutUpdated = BF.getLayout().update(NewLayout); |
| 899 | |
| 900 | // For shared objects, invoke instructions and corresponding landing pads |
| 901 | // have to be placed in the same fragment. When we split them, create |
| 902 | // trampoline landing pads that will redirect the execution to real LPs. |
| 903 | TrampolineSetType Trampolines; |
| 904 | if (BF.hasEHRanges() && BF.isSplit()) { |
| 905 | // If all landing pads for this fragment are grouped in one (potentially |
| 906 | // different) fragment, we can set LPStart to the start of that fragment |
| 907 | // and avoid trampoline code. |
| 908 | bool NeedsTrampolines = false; |
| 909 | for (FunctionFragment &FF : BF.getLayout().fragments()) { |
| 910 | // Vector of fragments that contain landing pads for this fragment. |
| 911 | SmallVector<FragmentNum, 4> LandingPadFragments; |
| 912 | for (const BinaryBasicBlock *BB : FF) |
| 913 | for (const BinaryBasicBlock *LPB : BB->landing_pads()) |
| 914 | LandingPadFragments.push_back(Elt: LPB->getFragmentNum()); |
| 915 | |
| 916 | // Eliminate duplicate entries from the vector. |
| 917 | llvm::sort(C&: LandingPadFragments); |
| 918 | auto Last = llvm::unique(R&: LandingPadFragments); |
| 919 | LandingPadFragments.erase(CS: Last, CE: LandingPadFragments.end()); |
| 920 | |
| 921 | if (LandingPadFragments.size() == 0) { |
| 922 | // If the fragment has no landing pads, we can safely set itself as its |
| 923 | // landing pad fragment. |
| 924 | BF.setLPFragment(F: FF.getFragmentNum(), LPF: FF.getFragmentNum()); |
| 925 | } else if (LandingPadFragments.size() == 1) { |
| 926 | BF.setLPFragment(F: FF.getFragmentNum(), LPF: LandingPadFragments.front()); |
| 927 | } else { |
| 928 | if (!BC.HasFixedLoadAddress) { |
| 929 | NeedsTrampolines = true; |
| 930 | break; |
| 931 | } else { |
| 932 | BF.setLPFragment(F: FF.getFragmentNum(), LPF: std::nullopt); |
| 933 | } |
| 934 | } |
| 935 | } |
| 936 | |
| 937 | // Trampolines guarantee that all landing pads for any given fragment will |
| 938 | // be contained in the same fragment. |
| 939 | if (NeedsTrampolines) { |
| 940 | for (FunctionFragment &FF : BF.getLayout().fragments()) |
| 941 | BF.setLPFragment(F: FF.getFragmentNum(), LPF: FF.getFragmentNum()); |
| 942 | Trampolines = createEHTrampolines(Function&: BF); |
| 943 | } |
| 944 | } |
| 945 | |
| 946 | // Check the new size to see if it's worth splitting the function. |
| 947 | if (BC.isX86() && LayoutUpdated) { |
| 948 | std::tie(args&: HotSize, args&: ColdSize) = BC.calculateEmittedSize(BF); |
| 949 | LLVM_DEBUG(dbgs() << "Estimated size for function " << BF |
| 950 | << " post-split is <0x" << Twine::utohexstr(HotSize) |
| 951 | << ", 0x" << Twine::utohexstr(ColdSize) << ">\n" ); |
| 952 | if (alignTo(Value: OriginalHotSize, Align: opts::SplitAlignThreshold) <= |
| 953 | alignTo(Value: HotSize, Align: opts::SplitAlignThreshold) + opts::SplitThreshold) { |
| 954 | if (opts::Verbosity >= 2) { |
| 955 | BC.outs() << "BOLT-INFO: Reversing splitting of function " |
| 956 | << formatv(Fmt: "{0}:\n {1:x}, {2:x} -> {3:x}\n" , Vals&: BF, Vals&: HotSize, |
| 957 | Vals&: ColdSize, Vals&: OriginalHotSize); |
| 958 | } |
| 959 | |
| 960 | // Reverse the action of createEHTrampolines(). The trampolines will be |
| 961 | // placed immediately before the matching destination resulting in no |
| 962 | // extra code. |
| 963 | if (PreSplitLayout.size() != BF.size()) |
| 964 | PreSplitLayout = mergeEHTrampolines(BF, Layout&: PreSplitLayout, Trampolines); |
| 965 | |
| 966 | for (BinaryBasicBlock &BB : BF) |
| 967 | BB.setFragmentNum(FragmentNum::main()); |
| 968 | BF.getLayout().update(NewLayout: PreSplitLayout); |
| 969 | } else { |
| 970 | SplitBytesHot += HotSize; |
| 971 | SplitBytesCold += ColdSize; |
| 972 | } |
| 973 | } |
| 974 | |
| 975 | // Restore LP fragment for the main fragment if the splitting was undone. |
| 976 | if (BF.hasEHRanges() && !BF.isSplit()) |
| 977 | BF.setLPFragment(F: FragmentNum::main(), LPF: FragmentNum::main()); |
| 978 | |
| 979 | // Fix branches if the splitting decision of the pass after function |
| 980 | // reordering is different from that of the pass before function reordering. |
| 981 | if (LayoutUpdated && BC.HasFinalizedFunctionOrder) |
| 982 | BF.fixBranches(); |
| 983 | } |
| 984 | |
| 985 | SplitFunctions::TrampolineSetType |
| 986 | SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { |
| 987 | const auto &MIB = BF.getBinaryContext().MIB; |
| 988 | |
| 989 | // Map real landing pads to the corresponding trampolines. |
| 990 | TrampolineSetType LPTrampolines; |
| 991 | |
| 992 | // Iterate over the copy of basic blocks since we are adding new blocks to the |
| 993 | // function which will invalidate its iterators. |
| 994 | std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); |
| 995 | for (BinaryBasicBlock *BB : Blocks) { |
| 996 | for (MCInst &Instr : *BB) { |
| 997 | const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Inst: Instr); |
| 998 | if (!EHInfo || !EHInfo->first) |
| 999 | continue; |
| 1000 | |
| 1001 | const MCSymbol *LPLabel = EHInfo->first; |
| 1002 | BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Label: LPLabel); |
| 1003 | if (BB->getFragmentNum() == LPBlock->getFragmentNum()) |
| 1004 | continue; |
| 1005 | |
| 1006 | const MCSymbol *TrampolineLabel = nullptr; |
| 1007 | const TrampolineKey Key(BB->getFragmentNum(), LPLabel); |
| 1008 | auto Iter = LPTrampolines.find(Val: Key); |
| 1009 | if (Iter != LPTrampolines.end()) { |
| 1010 | TrampolineLabel = Iter->second; |
| 1011 | } else { |
| 1012 | // Create a trampoline basic block in the same fragment as the thrower. |
| 1013 | // Note: there's no need to insert the jump instruction, it will be |
| 1014 | // added by fixBranches(). |
| 1015 | BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); |
| 1016 | TrampolineBB->setFragmentNum(BB->getFragmentNum()); |
| 1017 | TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); |
| 1018 | TrampolineBB->addSuccessor(Succ: LPBlock, Count: TrampolineBB->getExecutionCount()); |
| 1019 | TrampolineBB->setCFIState(LPBlock->getCFIState()); |
| 1020 | TrampolineLabel = TrampolineBB->getLabel(); |
| 1021 | LPTrampolines.insert(KV: std::make_pair(x: Key, y&: TrampolineLabel)); |
| 1022 | } |
| 1023 | |
| 1024 | // Substitute the landing pad with the trampoline. |
| 1025 | MIB->updateEHInfo(Inst&: Instr, |
| 1026 | LP: MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); |
| 1027 | } |
| 1028 | } |
| 1029 | |
| 1030 | if (LPTrampolines.empty()) |
| 1031 | return LPTrampolines; |
| 1032 | |
| 1033 | // All trampoline blocks were added to the end of the function. Place them at |
| 1034 | // the end of corresponding fragments. |
| 1035 | BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), |
| 1036 | BF.getLayout().block_end()); |
| 1037 | stable_sort(Range&: NewLayout, C: [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { |
| 1038 | return A->getFragmentNum() < B->getFragmentNum(); |
| 1039 | }); |
| 1040 | BF.getLayout().update(NewLayout); |
| 1041 | |
| 1042 | // Conservatively introduce branch instructions. |
| 1043 | BF.fixBranches(); |
| 1044 | |
| 1045 | // Update exception-handling CFG for the function. |
| 1046 | BF.recomputeLandingPads(); |
| 1047 | |
| 1048 | return LPTrampolines; |
| 1049 | } |
| 1050 | |
| 1051 | SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( |
| 1052 | BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, |
| 1053 | const SplitFunctions::TrampolineSetType &Trampolines) const { |
| 1054 | DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>> |
| 1055 | IncomingTrampolines; |
| 1056 | for (const auto &Entry : Trampolines) { |
| 1057 | IncomingTrampolines[Entry.getFirst().Target].emplace_back( |
| 1058 | Args: Entry.getSecond()); |
| 1059 | } |
| 1060 | |
| 1061 | BasicBlockOrderType MergedLayout; |
| 1062 | for (BinaryBasicBlock *BB : Layout) { |
| 1063 | auto Iter = IncomingTrampolines.find(Val: BB->getLabel()); |
| 1064 | if (Iter != IncomingTrampolines.end()) { |
| 1065 | for (const MCSymbol *const Trampoline : Iter->getSecond()) { |
| 1066 | BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Label: Trampoline); |
| 1067 | assert(LPBlock && "Could not find matching landing pad block." ); |
| 1068 | MergedLayout.push_back(Elt: LPBlock); |
| 1069 | } |
| 1070 | } |
| 1071 | MergedLayout.push_back(Elt: BB); |
| 1072 | } |
| 1073 | |
| 1074 | return MergedLayout; |
| 1075 | } |
| 1076 | |
| 1077 | } // namespace bolt |
| 1078 | } // namespace llvm |
| 1079 | |