1 | //===- bolt/Passes/SplitFunctions.cpp - Pass for splitting function code --===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the SplitFunctions pass. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/SplitFunctions.h" |
14 | #include "bolt/Core/BinaryBasicBlock.h" |
15 | #include "bolt/Core/BinaryFunction.h" |
16 | #include "bolt/Core/FunctionLayout.h" |
17 | #include "bolt/Core/ParallelUtilities.h" |
18 | #include "bolt/Utils/CommandLineOpts.h" |
19 | #include "llvm/ADT/STLExtras.h" |
20 | #include "llvm/ADT/SmallVector.h" |
21 | #include "llvm/ADT/iterator_range.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | #include "llvm/Support/FormatVariadic.h" |
24 | #include <algorithm> |
25 | #include <iterator> |
26 | #include <memory> |
27 | #include <numeric> |
28 | #include <random> |
29 | #include <vector> |
30 | |
31 | #define DEBUG_TYPE "bolt-opts" |
32 | |
33 | using namespace llvm; |
34 | using namespace bolt; |
35 | |
36 | namespace { |
37 | class DeprecatedSplitFunctionOptionParser : public cl::parser<bool> { |
38 | public: |
39 | explicit DeprecatedSplitFunctionOptionParser(cl::Option &O) |
40 | : cl::parser<bool>(O) {} |
41 | |
42 | bool parse(cl::Option &O, StringRef ArgName, StringRef Arg, bool &Value) { |
43 | if (Arg == "2" || Arg == "3" ) { |
44 | Value = true; |
45 | errs() << formatv(Fmt: "BOLT-WARNING: specifying non-boolean value \"{0}\" " |
46 | "for option -{1} is deprecated\n" , |
47 | Vals&: Arg, Vals&: ArgName); |
48 | return false; |
49 | } |
50 | return cl::parser<bool>::parse(O, ArgName, Arg, Val&: Value); |
51 | } |
52 | }; |
53 | } // namespace |
54 | |
55 | namespace opts { |
56 | |
57 | extern cl::OptionCategory BoltOptCategory; |
58 | |
59 | extern cl::opt<bool> SplitEH; |
60 | extern cl::opt<unsigned> ExecutionCountThreshold; |
61 | extern cl::opt<uint32_t> RandomSeed; |
62 | |
63 | static cl::opt<bool> AggressiveSplitting( |
64 | "split-all-cold" , cl::desc("outline as many cold basic blocks as possible" ), |
65 | cl::cat(BoltOptCategory)); |
66 | |
67 | static cl::opt<unsigned> SplitAlignThreshold( |
68 | "split-align-threshold" , |
69 | cl::desc("when deciding to split a function, apply this alignment " |
70 | "while doing the size comparison (see -split-threshold). " |
71 | "Default value: 2." ), |
72 | cl::init(Val: 2), |
73 | |
74 | cl::Hidden, cl::cat(BoltOptCategory)); |
75 | |
76 | static cl::opt<bool, false, DeprecatedSplitFunctionOptionParser> |
77 | SplitFunctions("split-functions" , |
78 | cl::desc("split functions into fragments" ), |
79 | cl::cat(BoltOptCategory)); |
80 | |
81 | static cl::opt<unsigned> SplitThreshold( |
82 | "split-threshold" , |
83 | cl::desc("split function only if its main size is reduced by more than " |
84 | "given amount of bytes. Default value: 0, i.e. split iff the " |
85 | "size is reduced. Note that on some architectures the size can " |
86 | "increase after splitting." ), |
87 | cl::init(Val: 0), cl::Hidden, cl::cat(BoltOptCategory)); |
88 | |
89 | static cl::opt<SplitFunctionsStrategy> SplitStrategy( |
90 | "split-strategy" , cl::init(Val: SplitFunctionsStrategy::Profile2), |
91 | cl::values(clEnumValN(SplitFunctionsStrategy::Profile2, "profile2" , |
92 | "split each function into a hot and cold fragment " |
93 | "using profiling information" )), |
94 | cl::values(clEnumValN(SplitFunctionsStrategy::CDSplit, "cdsplit" , |
95 | "split each function into a hot, warm, and cold " |
96 | "fragment using profiling information" )), |
97 | cl::values(clEnumValN( |
98 | SplitFunctionsStrategy::Random2, "random2" , |
99 | "split each function into a hot and cold fragment at a randomly chosen " |
100 | "split point (ignoring any available profiling information)" )), |
101 | cl::values(clEnumValN( |
102 | SplitFunctionsStrategy::RandomN, "randomN" , |
103 | "split each function into N fragments at a randomly chosen split " |
104 | "points (ignoring any available profiling information)" )), |
105 | cl::values(clEnumValN( |
106 | SplitFunctionsStrategy::All, "all" , |
107 | "split all basic blocks of each function into fragments such that each " |
108 | "fragment contains exactly a single basic block" )), |
109 | cl::desc("strategy used to partition blocks into fragments" ), |
110 | cl::cat(BoltOptCategory)); |
111 | |
112 | static cl::opt<double> CallScale( |
113 | "call-scale" , |
114 | cl::desc("Call score scale coefficient (when --split-strategy=cdsplit)" ), |
115 | cl::init(Val: 0.95), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
116 | |
117 | static cl::opt<double> |
118 | CallPower("call-power" , |
119 | cl::desc("Call score power (when --split-strategy=cdsplit)" ), |
120 | cl::init(Val: 0.05), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
121 | |
122 | static cl::opt<double> |
123 | JumpPower("jump-power" , |
124 | cl::desc("Jump score power (when --split-strategy=cdsplit)" ), |
125 | cl::init(Val: 0.15), cl::ReallyHidden, cl::cat(BoltOptCategory)); |
126 | } // namespace opts |
127 | |
128 | namespace { |
129 | bool hasFullProfile(const BinaryFunction &BF) { |
130 | return llvm::all_of(Range: BF.blocks(), P: [](const BinaryBasicBlock &BB) { |
131 | return BB.getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE; |
132 | }); |
133 | } |
134 | |
135 | bool allBlocksCold(const BinaryFunction &BF) { |
136 | return llvm::all_of(Range: BF.blocks(), P: [](const BinaryBasicBlock &BB) { |
137 | return BB.getExecutionCount() == 0; |
138 | }); |
139 | } |
140 | |
141 | struct SplitProfile2 final : public SplitStrategy { |
142 | bool canSplit(const BinaryFunction &BF) override { |
143 | return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); |
144 | } |
145 | |
146 | bool compactFragments() override { return true; } |
147 | |
148 | void fragment(const BlockIt Start, const BlockIt End) override { |
149 | for (BinaryBasicBlock *const BB : llvm::make_range(x: Start, y: End)) { |
150 | if (BB->getExecutionCount() == 0) |
151 | BB->setFragmentNum(FragmentNum::cold()); |
152 | } |
153 | } |
154 | }; |
155 | |
156 | struct SplitCacheDirected final : public SplitStrategy { |
157 | BinaryContext &BC; |
158 | using BasicBlockOrder = BinaryFunction::BasicBlockOrderType; |
159 | |
160 | bool canSplit(const BinaryFunction &BF) override { |
161 | return BF.hasValidProfile() && hasFullProfile(BF) && !allBlocksCold(BF); |
162 | } |
163 | |
164 | explicit SplitCacheDirected(BinaryContext &BC) : BC(BC) { |
165 | initializeAuxiliaryVariables(); |
166 | buildCallGraph(); |
167 | } |
168 | |
169 | // When some functions are hot-warm split and others are hot-warm-cold split, |
170 | // we do not want to change the fragment numbers of the blocks in the hot-warm |
171 | // split functions. |
172 | bool compactFragments() override { return false; } |
173 | |
174 | void fragment(const BlockIt Start, const BlockIt End) override { |
175 | BasicBlockOrder BlockOrder(Start, End); |
176 | BinaryFunction &BF = *BlockOrder.front()->getFunction(); |
177 | // No need to re-split small functions. |
178 | if (BlockOrder.size() <= 2) |
179 | return; |
180 | |
181 | size_t BestSplitIndex = findSplitIndex(BF, BlockOrder); |
182 | assert(BestSplitIndex < BlockOrder.size()); |
183 | |
184 | // Assign fragments based on the computed best split index. |
185 | // All basic blocks with index up to the best split index become hot. |
186 | // All remaining blocks are warm / cold depending on if count is |
187 | // greater than zero or not. |
188 | for (size_t Index = 0; Index < BlockOrder.size(); Index++) { |
189 | BinaryBasicBlock *BB = BlockOrder[Index]; |
190 | if (Index <= BestSplitIndex) |
191 | BB->setFragmentNum(FragmentNum::main()); |
192 | else |
193 | BB->setFragmentNum(BB->getKnownExecutionCount() > 0 |
194 | ? FragmentNum::warm() |
195 | : FragmentNum::cold()); |
196 | } |
197 | } |
198 | |
199 | private: |
200 | struct CallInfo { |
201 | size_t Length; |
202 | size_t Count; |
203 | }; |
204 | |
205 | struct SplitScore { |
206 | size_t SplitIndex = size_t(-1); |
207 | size_t HotSizeReduction = 0; |
208 | double LocalScore = 0; |
209 | double CoverCallScore = 0; |
210 | |
211 | double sum() const { return LocalScore + CoverCallScore; } |
212 | }; |
213 | |
214 | // Auxiliary variables used by the algorithm. |
215 | size_t TotalNumBlocks{0}; |
216 | size_t OrigHotSectionSize{0}; |
217 | DenseMap<const BinaryBasicBlock *, size_t> GlobalIndices; |
218 | DenseMap<const BinaryBasicBlock *, size_t> BBSizes; |
219 | DenseMap<const BinaryBasicBlock *, size_t> BBOffsets; |
220 | |
221 | // Call graph. |
222 | std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callers; |
223 | std::vector<SmallVector<const BinaryBasicBlock *, 0>> Callees; |
224 | |
225 | bool shouldConsiderForCallGraph(const BinaryFunction &BF) { |
226 | // Only a subset of the functions in the binary will be considered |
227 | // for initializing auxiliary variables and building call graph. |
228 | return BF.hasValidIndex() && BF.hasValidProfile() && !BF.empty(); |
229 | } |
230 | |
231 | void initializeAuxiliaryVariables() { |
232 | for (BinaryFunction *BF : BC.getSortedFunctions()) { |
233 | if (!shouldConsiderForCallGraph(BF: *BF)) |
234 | continue; |
235 | |
236 | // Calculate the size of each BB after hot-cold splitting. |
237 | // This populates BinaryBasicBlock::OutputAddressRange which |
238 | // can be used to compute the size of each BB. |
239 | BC.calculateEmittedSize(BF&: *BF, /*FixBranches=*/true); |
240 | |
241 | for (BinaryBasicBlock *BB : BF->getLayout().blocks()) { |
242 | // Unique global index. |
243 | GlobalIndices[BB] = TotalNumBlocks; |
244 | TotalNumBlocks++; |
245 | |
246 | // Block size after hot-cold splitting. |
247 | BBSizes[BB] = BB->getOutputSize(); |
248 | |
249 | // Hot block offset after hot-cold splitting. |
250 | BBOffsets[BB] = OrigHotSectionSize; |
251 | if (!BB->isSplit()) |
252 | OrigHotSectionSize += BBSizes[BB]; |
253 | } |
254 | } |
255 | } |
256 | |
257 | void buildCallGraph() { |
258 | Callers.resize(new_size: TotalNumBlocks); |
259 | Callees.resize(new_size: TotalNumBlocks); |
260 | for (const BinaryFunction *SrcFunction : BC.getSortedFunctions()) { |
261 | if (!shouldConsiderForCallGraph(BF: *SrcFunction)) |
262 | continue; |
263 | |
264 | for (BinaryBasicBlock &SrcBB : SrcFunction->blocks()) { |
265 | // Skip blocks that are not executed |
266 | if (SrcBB.getKnownExecutionCount() == 0) |
267 | continue; |
268 | |
269 | // Find call instructions and extract target symbols from each one |
270 | for (const MCInst &Inst : SrcBB) { |
271 | if (!BC.MIB->isCall(Inst)) |
272 | continue; |
273 | |
274 | // Call info |
275 | const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); |
276 | // Ignore calls w/o information |
277 | if (!DstSym) |
278 | continue; |
279 | |
280 | const BinaryFunction *DstFunction = BC.getFunctionForSymbol(Symbol: DstSym); |
281 | // Ignore calls that do not have a valid target, but do not ignore |
282 | // recursive calls, because caller block could be moved to warm. |
283 | if (!DstFunction || DstFunction->getLayout().block_empty()) |
284 | continue; |
285 | |
286 | const BinaryBasicBlock *DstBB = &(DstFunction->front()); |
287 | |
288 | // Record the call only if DstBB is also in functions to consider for |
289 | // call graph. |
290 | if (GlobalIndices.contains(Val: DstBB)) { |
291 | Callers[GlobalIndices[DstBB]].push_back(Elt: &SrcBB); |
292 | Callees[GlobalIndices[&SrcBB]].push_back(Elt: DstBB); |
293 | } |
294 | } |
295 | } |
296 | } |
297 | } |
298 | |
299 | /// Populate BinaryBasicBlock::OutputAddressRange with estimated basic block |
300 | /// start and end addresses for hot and warm basic blocks, assuming hot-warm |
301 | /// splitting happens at \p SplitIndex. Also return estimated end addresses |
302 | /// of the hot fragment before and after splitting. |
303 | /// The estimations take into account the potential addition of branch |
304 | /// instructions due to split fall through branches as well as the need to |
305 | /// use longer branch instructions for split (un)conditional branches. |
306 | std::pair<size_t, size_t> |
307 | estimatePostSplitBBAddress(const BasicBlockOrder &BlockOrder, |
308 | const size_t SplitIndex) { |
309 | assert(SplitIndex < BlockOrder.size() && "Invalid split index" ); |
310 | |
311 | // Update function layout assuming hot-warm splitting at SplitIndex. |
312 | for (size_t Index = 0; Index < BlockOrder.size(); Index++) { |
313 | BinaryBasicBlock *BB = BlockOrder[Index]; |
314 | if (BB->getFragmentNum() == FragmentNum::cold()) |
315 | break; |
316 | BB->setFragmentNum(Index <= SplitIndex ? FragmentNum::main() |
317 | : FragmentNum::warm()); |
318 | } |
319 | BinaryFunction *BF = BlockOrder[0]->getFunction(); |
320 | BF->getLayout().update(NewLayout: BlockOrder); |
321 | // Populate BB.OutputAddressRange under the updated layout. |
322 | BC.calculateEmittedSize(BF&: *BF); |
323 | |
324 | // Populate BB.OutputAddressRange with estimated new start and end addresses |
325 | // and compute the old end address of the hot section and the new end |
326 | // address of the hot section. |
327 | size_t OldHotEndAddr{0}; |
328 | size_t NewHotEndAddr{0}; |
329 | size_t CurrentAddr = BBOffsets[BlockOrder[0]]; |
330 | for (BinaryBasicBlock *BB : BlockOrder) { |
331 | // We only care about new addresses of blocks in hot/warm. |
332 | if (BB->getFragmentNum() == FragmentNum::cold()) |
333 | break; |
334 | const size_t NewSize = BB->getOutputSize(); |
335 | BB->setOutputStartAddress(CurrentAddr); |
336 | CurrentAddr += NewSize; |
337 | BB->setOutputEndAddress(CurrentAddr); |
338 | if (BB->getLayoutIndex() == SplitIndex) { |
339 | NewHotEndAddr = CurrentAddr; |
340 | // Approximate the start address of the warm fragment of the current |
341 | // function using the original hot section size. |
342 | CurrentAddr = OrigHotSectionSize; |
343 | } |
344 | OldHotEndAddr = BBOffsets[BB] + BBSizes[BB]; |
345 | } |
346 | return std::make_pair(x&: OldHotEndAddr, y&: NewHotEndAddr); |
347 | } |
348 | |
349 | /// Get a collection of "shortenable" calls, that is, calls of type X->Y |
350 | /// when the function order is [... X ... BF ... Y ...]. |
351 | /// If the hot fragment size of BF is reduced, then such calls are guaranteed |
352 | /// to get shorter by the reduced hot fragment size. |
353 | std::vector<CallInfo> (const BinaryFunction &BF) { |
354 | // Record the length and the count of the calls that can be shortened |
355 | std::vector<CallInfo> CoverCalls; |
356 | if (opts::CallScale == 0) |
357 | return CoverCalls; |
358 | |
359 | const BinaryFunction *ThisBF = &BF; |
360 | const BinaryBasicBlock *ThisBB = &(ThisBF->front()); |
361 | const size_t ThisGI = GlobalIndices[ThisBB]; |
362 | |
363 | for (const BinaryFunction *DstBF : BC.getSortedFunctions()) { |
364 | if (!shouldConsiderForCallGraph(BF: *DstBF)) |
365 | continue; |
366 | |
367 | const BinaryBasicBlock *DstBB = &(DstBF->front()); |
368 | if (DstBB->getKnownExecutionCount() == 0) |
369 | continue; |
370 | |
371 | const size_t DstGI = GlobalIndices[DstBB]; |
372 | for (const BinaryBasicBlock *SrcBB : Callers[DstGI]) { |
373 | const BinaryFunction *SrcBF = SrcBB->getFunction(); |
374 | if (ThisBF == SrcBF) |
375 | continue; |
376 | |
377 | const size_t CallCount = SrcBB->getKnownExecutionCount(); |
378 | |
379 | const size_t SrcGI = GlobalIndices[SrcBB]; |
380 | |
381 | const bool IsCoverCall = (SrcGI < ThisGI && ThisGI < DstGI) || |
382 | (DstGI <= ThisGI && ThisGI < SrcGI); |
383 | if (!IsCoverCall) |
384 | continue; |
385 | |
386 | const size_t SrcBBEndAddr = BBOffsets[SrcBB] + BBSizes[SrcBB]; |
387 | const size_t DstBBStartAddr = BBOffsets[DstBB]; |
388 | const size_t CallLength = |
389 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
390 | const CallInfo CI{.Length: CallLength, .Count: CallCount}; |
391 | CoverCalls.emplace_back(args: CI); |
392 | } |
393 | } |
394 | return CoverCalls; |
395 | } |
396 | |
397 | /// Compute the edge score of a call edge. |
398 | double computeCallScore(uint64_t CallCount, size_t CallLength) { |
399 | // Increase call lengths by 1 to avoid raising 0 to a negative power. |
400 | return opts::CallScale * static_cast<double>(CallCount) / |
401 | std::pow(x: static_cast<double>(CallLength + 1), y: opts::CallPower); |
402 | } |
403 | |
404 | /// Compute the edge score of a jump (branch) edge. |
405 | double computeJumpScore(uint64_t JumpCount, size_t JumpLength) { |
406 | // Increase jump lengths by 1 to avoid raising 0 to a negative power. |
407 | return static_cast<double>(JumpCount) / |
408 | std::pow(x: static_cast<double>(JumpLength + 1), y: opts::JumpPower); |
409 | } |
410 | |
411 | /// Compute sum of scores over jumps within \p BlockOrder given \p SplitIndex. |
412 | /// Increament Score.LocalScore in place by the sum. |
413 | void computeJumpScore(const BasicBlockOrder &BlockOrder, |
414 | const size_t SplitIndex, SplitScore &Score) { |
415 | |
416 | for (const BinaryBasicBlock *SrcBB : BlockOrder) { |
417 | if (SrcBB->getKnownExecutionCount() == 0) |
418 | continue; |
419 | |
420 | const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; |
421 | |
422 | for (const auto Pair : zip(t: SrcBB->successors(), u: SrcBB->branch_info())) { |
423 | const BinaryBasicBlock *DstBB = std::get<0>(t: Pair); |
424 | const BinaryBasicBlock::BinaryBranchInfo &Branch = std::get<1>(t: Pair); |
425 | const size_t JumpCount = Branch.Count; |
426 | |
427 | if (JumpCount == 0) |
428 | continue; |
429 | |
430 | const size_t DstBBStartAddr = DstBB->getOutputAddressRange().first; |
431 | const size_t NewJumpLength = |
432 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
433 | Score.LocalScore += computeJumpScore(JumpCount, JumpLength: NewJumpLength); |
434 | } |
435 | } |
436 | } |
437 | |
438 | /// Compute sum of scores over calls originated in the current function |
439 | /// given \p SplitIndex. Increament Score.LocalScore in place by the sum. |
440 | void computeLocalCallScore(const BasicBlockOrder &BlockOrder, |
441 | const size_t SplitIndex, SplitScore &Score) { |
442 | if (opts::CallScale == 0) |
443 | return; |
444 | |
445 | // Global index of the last block in the current function. |
446 | // This is later used to determine whether a call originated in the current |
447 | // function is to a function that comes after the current function. |
448 | const size_t LastGlobalIndex = GlobalIndices[BlockOrder.back()]; |
449 | |
450 | // The length of calls originated in the input function can increase / |
451 | // decrease depending on the splitting decision. |
452 | for (const BinaryBasicBlock *SrcBB : BlockOrder) { |
453 | const size_t CallCount = SrcBB->getKnownExecutionCount(); |
454 | // If SrcBB does not call any functions, skip it. |
455 | if (CallCount == 0) |
456 | continue; |
457 | |
458 | // Obtain an estimate on the end address of the src basic block |
459 | // after splitting at SplitIndex. |
460 | const size_t SrcBBEndAddr = SrcBB->getOutputAddressRange().second; |
461 | |
462 | for (const BinaryBasicBlock *DstBB : Callees[GlobalIndices[SrcBB]]) { |
463 | // Obtain an estimate on the start address of the dst basic block |
464 | // after splitting at SplitIndex. If DstBB is in a function before |
465 | // the current function, then its start address remains unchanged. |
466 | size_t DstBBStartAddr = BBOffsets[DstBB]; |
467 | // If DstBB is in a function after the current function, then its |
468 | // start address should be adjusted based on the reduction in hot size. |
469 | if (GlobalIndices[DstBB] > LastGlobalIndex) { |
470 | assert(DstBBStartAddr >= Score.HotSizeReduction); |
471 | DstBBStartAddr -= Score.HotSizeReduction; |
472 | } |
473 | const size_t NewCallLength = |
474 | AbsoluteDifference(X: SrcBBEndAddr, Y: DstBBStartAddr); |
475 | Score.LocalScore += computeCallScore(CallCount, CallLength: NewCallLength); |
476 | } |
477 | } |
478 | } |
479 | |
480 | /// Compute sum of splitting scores for cover calls of the input function. |
481 | /// Increament Score.CoverCallScore in place by the sum. |
482 | void computeCoverCallScore(const BasicBlockOrder &BlockOrder, |
483 | const size_t SplitIndex, |
484 | const std::vector<CallInfo> &CoverCalls, |
485 | SplitScore &Score) { |
486 | if (opts::CallScale == 0) |
487 | return; |
488 | |
489 | for (const CallInfo CI : CoverCalls) { |
490 | assert(CI.Length >= Score.HotSizeReduction && |
491 | "Length of cover calls must exceed reduced size of hot fragment." ); |
492 | // Compute the new length of the call, which is shorter than the original |
493 | // one by the size of the splitted fragment minus the total size increase. |
494 | const size_t NewCallLength = CI.Length - Score.HotSizeReduction; |
495 | Score.CoverCallScore += computeCallScore(CallCount: CI.Count, CallLength: NewCallLength); |
496 | } |
497 | } |
498 | |
499 | /// Compute the split score of splitting a function at a given index. |
500 | /// The split score consists of local score and cover score. This function |
501 | /// returns \p Score of SplitScore type. It contains the local score and |
502 | /// cover score of the current splitting index. For easier book keeping and |
503 | /// comparison, it also stores the split index and the resulting reduction |
504 | /// in hot fragment size. |
505 | SplitScore computeSplitScore(const BinaryFunction &BF, |
506 | const BasicBlockOrder &BlockOrder, |
507 | const size_t SplitIndex, |
508 | const std::vector<CallInfo> &CoverCalls) { |
509 | // Populate BinaryBasicBlock::OutputAddressRange with estimated |
510 | // new start and end addresses after hot-warm splitting at SplitIndex. |
511 | size_t OldHotEnd; |
512 | size_t NewHotEnd; |
513 | std::tie(args&: OldHotEnd, args&: NewHotEnd) = |
514 | estimatePostSplitBBAddress(BlockOrder, SplitIndex); |
515 | |
516 | SplitScore Score; |
517 | Score.SplitIndex = SplitIndex; |
518 | |
519 | // It's not worth splitting if OldHotEnd < NewHotEnd. |
520 | if (OldHotEnd < NewHotEnd) |
521 | return Score; |
522 | |
523 | // Hot fragment size reduction due to splitting. |
524 | Score.HotSizeReduction = OldHotEnd - NewHotEnd; |
525 | |
526 | // First part of LocalScore is the sum over call edges originated in the |
527 | // input function. These edges can get shorter or longer depending on |
528 | // SplitIndex. Score.LocalScore is increamented in place. |
529 | computeLocalCallScore(BlockOrder, SplitIndex, Score); |
530 | |
531 | // Second part of LocalScore is the sum over jump edges with src basic block |
532 | // and dst basic block in the current function. Score.LocalScore is |
533 | // increamented in place. |
534 | computeJumpScore(BlockOrder, SplitIndex, Score); |
535 | |
536 | // Compute CoverCallScore and store in Score in place. |
537 | computeCoverCallScore(BlockOrder, SplitIndex, CoverCalls, Score); |
538 | return Score; |
539 | } |
540 | |
541 | /// Find the most likely successor of a basic block when it has one or two |
542 | /// successors. Return nullptr otherwise. |
543 | const BinaryBasicBlock *getMostLikelySuccessor(const BinaryBasicBlock *BB) { |
544 | if (BB->succ_size() == 1) |
545 | return BB->getSuccessor(); |
546 | if (BB->succ_size() == 2) { |
547 | uint64_t TakenCount = BB->getTakenBranchInfo().Count; |
548 | assert(TakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); |
549 | uint64_t NonTakenCount = BB->getFallthroughBranchInfo().Count; |
550 | assert(NonTakenCount != BinaryBasicBlock::COUNT_NO_PROFILE); |
551 | if (TakenCount > NonTakenCount) |
552 | return BB->getConditionalSuccessor(Condition: true); |
553 | else if (TakenCount < NonTakenCount) |
554 | return BB->getConditionalSuccessor(Condition: false); |
555 | } |
556 | return nullptr; |
557 | } |
558 | |
559 | /// Find the best index for splitting. The returned value is the index of the |
560 | /// last hot basic block. Hence, "no splitting" is equivalent to returning the |
561 | /// value which is one less than the size of the function. |
562 | size_t findSplitIndex(const BinaryFunction &BF, |
563 | const BasicBlockOrder &BlockOrder) { |
564 | assert(BlockOrder.size() > 2); |
565 | // Find all function calls that can be shortened if we move blocks of the |
566 | // current function to warm/cold |
567 | const std::vector<CallInfo> CoverCalls = extractCoverCalls(BF); |
568 | |
569 | // Find the existing hot-cold splitting index. |
570 | size_t HotColdIndex = 0; |
571 | while (HotColdIndex + 1 < BlockOrder.size()) { |
572 | if (BlockOrder[HotColdIndex + 1]->getFragmentNum() == FragmentNum::cold()) |
573 | break; |
574 | HotColdIndex++; |
575 | } |
576 | assert(HotColdIndex + 1 == BlockOrder.size() || |
577 | (BlockOrder[HotColdIndex]->getFragmentNum() == FragmentNum::main() && |
578 | BlockOrder[HotColdIndex + 1]->getFragmentNum() == |
579 | FragmentNum::cold())); |
580 | |
581 | // Try all possible split indices up to HotColdIndex (blocks that have |
582 | // Index <= SplitIndex are in hot) and find the one maximizing the |
583 | // splitting score. |
584 | SplitScore BestScore; |
585 | for (size_t Index = 0; Index <= HotColdIndex; Index++) { |
586 | const BinaryBasicBlock *LastHotBB = BlockOrder[Index]; |
587 | assert(LastHotBB->getFragmentNum() != FragmentNum::cold()); |
588 | |
589 | // Do not break jump to the most likely successor. |
590 | if (Index + 1 < BlockOrder.size() && |
591 | BlockOrder[Index + 1] == getMostLikelySuccessor(BB: LastHotBB)) |
592 | continue; |
593 | |
594 | const SplitScore Score = |
595 | computeSplitScore(BF, BlockOrder, SplitIndex: Index, CoverCalls); |
596 | if (Score.sum() > BestScore.sum()) |
597 | BestScore = Score; |
598 | } |
599 | |
600 | // If we don't find a good splitting point, fallback to the original one. |
601 | if (BestScore.SplitIndex == size_t(-1)) |
602 | return HotColdIndex; |
603 | |
604 | return BestScore.SplitIndex; |
605 | } |
606 | }; |
607 | |
608 | struct SplitRandom2 final : public SplitStrategy { |
609 | std::minstd_rand0 Gen; |
610 | |
611 | SplitRandom2() : Gen(opts::RandomSeed.getValue()) {} |
612 | |
613 | bool canSplit(const BinaryFunction &BF) override { return true; } |
614 | |
615 | bool compactFragments() override { return true; } |
616 | |
617 | void fragment(const BlockIt Start, const BlockIt End) override { |
618 | using DiffT = typename std::iterator_traits<BlockIt>::difference_type; |
619 | const DiffT NumBlocks = End - Start; |
620 | assert(NumBlocks > 0 && "Cannot fragment empty function" ); |
621 | |
622 | // We want to split at least one block |
623 | const auto LastSplitPoint = std::max<DiffT>(a: NumBlocks - 1, b: 1); |
624 | std::uniform_int_distribution<DiffT> Dist(1, LastSplitPoint); |
625 | const DiffT SplitPoint = Dist(Gen); |
626 | for (BinaryBasicBlock *BB : llvm::make_range(x: Start + SplitPoint, y: End)) |
627 | BB->setFragmentNum(FragmentNum::cold()); |
628 | |
629 | LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " |
630 | "{1} possible) blocks to split\n" , |
631 | NumBlocks - SplitPoint, End - Start)); |
632 | } |
633 | }; |
634 | |
635 | struct SplitRandomN final : public SplitStrategy { |
636 | std::minstd_rand0 Gen; |
637 | |
638 | SplitRandomN() : Gen(opts::RandomSeed.getValue()) {} |
639 | |
640 | bool canSplit(const BinaryFunction &BF) override { return true; } |
641 | |
642 | bool compactFragments() override { return true; } |
643 | |
644 | void fragment(const BlockIt Start, const BlockIt End) override { |
645 | using DiffT = typename std::iterator_traits<BlockIt>::difference_type; |
646 | const DiffT NumBlocks = End - Start; |
647 | assert(NumBlocks > 0 && "Cannot fragment empty function" ); |
648 | |
649 | // With n blocks, there are n-1 places to split them. |
650 | const DiffT MaximumSplits = NumBlocks - 1; |
651 | // We want to generate at least two fragment if possible, but if there is |
652 | // only one block, no splits are possible. |
653 | const auto MinimumSplits = std::min<DiffT>(a: MaximumSplits, b: 1); |
654 | std::uniform_int_distribution<DiffT> Dist(MinimumSplits, MaximumSplits); |
655 | // Choose how many splits to perform |
656 | const DiffT NumSplits = Dist(Gen); |
657 | |
658 | // Draw split points from a lottery |
659 | SmallVector<unsigned, 0> Lottery(MaximumSplits); |
660 | // Start lottery at 1, because there is no meaningful splitpoint before the |
661 | // first block. |
662 | std::iota(first: Lottery.begin(), last: Lottery.end(), value: 1u); |
663 | std::shuffle(first: Lottery.begin(), last: Lottery.end(), g&: Gen); |
664 | Lottery.resize(N: NumSplits); |
665 | llvm::sort(C&: Lottery); |
666 | |
667 | // Add one past the end entry to lottery |
668 | Lottery.push_back(Elt: NumBlocks); |
669 | |
670 | unsigned LotteryIndex = 0; |
671 | unsigned BBPos = 0; |
672 | for (BinaryBasicBlock *const BB : make_range(x: Start, y: End)) { |
673 | // Check whether to start new fragment |
674 | if (BBPos >= Lottery[LotteryIndex]) |
675 | ++LotteryIndex; |
676 | |
677 | // Because LotteryIndex is 0 based and cold fragments are 1 based, we can |
678 | // use the index to assign fragments. |
679 | BB->setFragmentNum(FragmentNum(LotteryIndex)); |
680 | |
681 | ++BBPos; |
682 | } |
683 | } |
684 | }; |
685 | |
686 | struct SplitAll final : public SplitStrategy { |
687 | bool canSplit(const BinaryFunction &BF) override { return true; } |
688 | |
689 | bool compactFragments() override { |
690 | // Keeping empty fragments allows us to test, that empty fragments do not |
691 | // generate symbols. |
692 | return false; |
693 | } |
694 | |
695 | void fragment(const BlockIt Start, const BlockIt End) override { |
696 | unsigned Fragment = 0; |
697 | for (BinaryBasicBlock *const BB : llvm::make_range(x: Start, y: End)) |
698 | BB->setFragmentNum(FragmentNum(Fragment++)); |
699 | } |
700 | }; |
701 | } // namespace |
702 | |
703 | namespace llvm { |
704 | namespace bolt { |
705 | |
706 | bool SplitFunctions::shouldOptimize(const BinaryFunction &BF) const { |
707 | // Apply execution count threshold |
708 | if (BF.getKnownExecutionCount() < opts::ExecutionCountThreshold) |
709 | return false; |
710 | |
711 | return BinaryFunctionPass::shouldOptimize(BF); |
712 | } |
713 | |
714 | Error SplitFunctions::runOnFunctions(BinaryContext &BC) { |
715 | if (!opts::SplitFunctions) |
716 | return Error::success(); |
717 | |
718 | // If split strategy is not CDSplit, then a second run of the pass is not |
719 | // needed after function reordering. |
720 | if (BC.HasFinalizedFunctionOrder && |
721 | opts::SplitStrategy != SplitFunctionsStrategy::CDSplit) |
722 | return Error::success(); |
723 | |
724 | std::unique_ptr<SplitStrategy> Strategy; |
725 | bool ForceSequential = false; |
726 | |
727 | switch (opts::SplitStrategy) { |
728 | case SplitFunctionsStrategy::CDSplit: |
729 | // CDSplit runs two splitting passes: hot-cold splitting (SplitPrfoile2) |
730 | // before function reordering and hot-warm-cold splitting |
731 | // (SplitCacheDirected) after function reordering. |
732 | if (BC.HasFinalizedFunctionOrder) |
733 | Strategy = std::make_unique<SplitCacheDirected>(args&: BC); |
734 | else |
735 | Strategy = std::make_unique<SplitProfile2>(); |
736 | opts::AggressiveSplitting = true; |
737 | BC.HasWarmSection = true; |
738 | break; |
739 | case SplitFunctionsStrategy::Profile2: |
740 | Strategy = std::make_unique<SplitProfile2>(); |
741 | break; |
742 | case SplitFunctionsStrategy::Random2: |
743 | Strategy = std::make_unique<SplitRandom2>(); |
744 | // If we split functions randomly, we need to ensure that across runs with |
745 | // the same input, we generate random numbers for each function in the same |
746 | // order. |
747 | ForceSequential = true; |
748 | break; |
749 | case SplitFunctionsStrategy::RandomN: |
750 | Strategy = std::make_unique<SplitRandomN>(); |
751 | ForceSequential = true; |
752 | break; |
753 | case SplitFunctionsStrategy::All: |
754 | Strategy = std::make_unique<SplitAll>(); |
755 | break; |
756 | } |
757 | |
758 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
759 | return !shouldOptimize(BF); |
760 | }; |
761 | |
762 | ParallelUtilities::runOnEachFunction( |
763 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_BB_LINEAR, |
764 | WorkFunction: [&](BinaryFunction &BF) { splitFunction(Function&: BF, Strategy&: *Strategy); }, SkipPredicate: SkipFunc, |
765 | LogName: "SplitFunctions" , ForceSequential); |
766 | |
767 | if (SplitBytesHot + SplitBytesCold > 0) |
768 | BC.outs() << "BOLT-INFO: splitting separates " << SplitBytesHot |
769 | << " hot bytes from " << SplitBytesCold << " cold bytes " |
770 | << format(Fmt: "(%.2lf%% of split functions is hot).\n" , |
771 | Vals: 100.0 * SplitBytesHot / |
772 | (SplitBytesHot + SplitBytesCold)); |
773 | return Error::success(); |
774 | } |
775 | |
776 | void SplitFunctions::splitFunction(BinaryFunction &BF, SplitStrategy &S) { |
777 | if (BF.empty()) |
778 | return; |
779 | |
780 | if (!S.canSplit(BF)) |
781 | return; |
782 | |
783 | FunctionLayout &Layout = BF.getLayout(); |
784 | BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), |
785 | Layout.block_end()); |
786 | |
787 | BinaryContext &BC = BF.getBinaryContext(); |
788 | size_t OriginalHotSize; |
789 | size_t HotSize; |
790 | size_t ColdSize; |
791 | if (BC.isX86()) { |
792 | std::tie(args&: OriginalHotSize, args&: ColdSize) = BC.calculateEmittedSize(BF); |
793 | LLVM_DEBUG(dbgs() << "Estimated size for function " << BF |
794 | << " pre-split is <0x" |
795 | << Twine::utohexstr(OriginalHotSize) << ", 0x" |
796 | << Twine::utohexstr(ColdSize) << ">\n" ); |
797 | } |
798 | |
799 | BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), |
800 | Layout.block_end()); |
801 | // Never outline the first basic block. |
802 | NewLayout.front()->setCanOutline(false); |
803 | for (BinaryBasicBlock *const BB : NewLayout) { |
804 | if (!BB->canOutline()) |
805 | continue; |
806 | |
807 | // Do not split extra entry points in aarch64. They can be referred by |
808 | // using ADRs and when this happens, these blocks cannot be placed far |
809 | // away due to the limited range in ADR instruction. |
810 | if (BC.isAArch64() && BB->isEntryPoint()) { |
811 | BB->setCanOutline(false); |
812 | continue; |
813 | } |
814 | |
815 | if (BF.hasEHRanges() && !opts::SplitEH) { |
816 | // We cannot move landing pads (or rather entry points for landing pads). |
817 | if (BB->isLandingPad()) { |
818 | BB->setCanOutline(false); |
819 | continue; |
820 | } |
821 | // We cannot move a block that can throw since exception-handling |
822 | // runtime cannot deal with split functions. However, if we can guarantee |
823 | // that the block never throws, it is safe to move the block to |
824 | // decrease the size of the function. |
825 | for (MCInst &Instr : *BB) { |
826 | if (BC.MIB->isInvoke(Inst: Instr)) { |
827 | BB->setCanOutline(false); |
828 | break; |
829 | } |
830 | } |
831 | } |
832 | } |
833 | |
834 | BF.getLayout().updateLayoutIndices(); |
835 | S.fragment(Start: NewLayout.begin(), End: NewLayout.end()); |
836 | |
837 | // Make sure all non-outlineable blocks are in the main-fragment. |
838 | for (BinaryBasicBlock *const BB : NewLayout) { |
839 | if (!BB->canOutline()) |
840 | BB->setFragmentNum(FragmentNum::main()); |
841 | } |
842 | |
843 | if (opts::AggressiveSplitting) { |
844 | // All blocks with 0 count that we can move go to the end of the function. |
845 | // Even if they were natural to cluster formation and were seen in-between |
846 | // hot basic blocks. |
847 | llvm::stable_sort(Range&: NewLayout, C: [&](const BinaryBasicBlock *const A, |
848 | const BinaryBasicBlock *const B) { |
849 | return A->getFragmentNum() < B->getFragmentNum(); |
850 | }); |
851 | } else if (BF.hasEHRanges() && !opts::SplitEH) { |
852 | // Typically functions with exception handling have landing pads at the end. |
853 | // We cannot move beginning of landing pads, but we can move 0-count blocks |
854 | // comprising landing pads to the end and thus facilitate splitting. |
855 | auto FirstLP = NewLayout.begin(); |
856 | while ((*FirstLP)->isLandingPad()) |
857 | ++FirstLP; |
858 | |
859 | std::stable_sort(first: FirstLP, last: NewLayout.end(), |
860 | comp: [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { |
861 | return A->getFragmentNum() < B->getFragmentNum(); |
862 | }); |
863 | } |
864 | |
865 | // Make sure that fragments are increasing. |
866 | FragmentNum CurrentFragment = NewLayout.back()->getFragmentNum(); |
867 | for (BinaryBasicBlock *const BB : reverse(C&: NewLayout)) { |
868 | if (BB->getFragmentNum() > CurrentFragment) |
869 | BB->setFragmentNum(CurrentFragment); |
870 | CurrentFragment = BB->getFragmentNum(); |
871 | } |
872 | |
873 | if (S.compactFragments()) { |
874 | FragmentNum CurrentFragment = FragmentNum::main(); |
875 | FragmentNum NewFragment = FragmentNum::main(); |
876 | for (BinaryBasicBlock *const BB : NewLayout) { |
877 | if (BB->getFragmentNum() > CurrentFragment) { |
878 | CurrentFragment = BB->getFragmentNum(); |
879 | NewFragment = FragmentNum(NewFragment.get() + 1); |
880 | } |
881 | BB->setFragmentNum(NewFragment); |
882 | } |
883 | } |
884 | |
885 | const bool LayoutUpdated = BF.getLayout().update(NewLayout); |
886 | |
887 | // For shared objects, invoke instructions and corresponding landing pads |
888 | // have to be placed in the same fragment. When we split them, create |
889 | // trampoline landing pads that will redirect the execution to real LPs. |
890 | TrampolineSetType Trampolines; |
891 | if (!BC.HasFixedLoadAddress && BF.hasEHRanges() && BF.isSplit()) |
892 | Trampolines = createEHTrampolines(Function&: BF); |
893 | |
894 | // Check the new size to see if it's worth splitting the function. |
895 | if (BC.isX86() && LayoutUpdated) { |
896 | std::tie(args&: HotSize, args&: ColdSize) = BC.calculateEmittedSize(BF); |
897 | LLVM_DEBUG(dbgs() << "Estimated size for function " << BF |
898 | << " post-split is <0x" << Twine::utohexstr(HotSize) |
899 | << ", 0x" << Twine::utohexstr(ColdSize) << ">\n" ); |
900 | if (alignTo(Value: OriginalHotSize, Align: opts::SplitAlignThreshold) <= |
901 | alignTo(Value: HotSize, Align: opts::SplitAlignThreshold) + opts::SplitThreshold) { |
902 | if (opts::Verbosity >= 2) { |
903 | BC.outs() << "BOLT-INFO: Reversing splitting of function " |
904 | << formatv(Fmt: "{0}:\n {1:x}, {2:x} -> {3:x}\n" , Vals&: BF, Vals&: HotSize, |
905 | Vals&: ColdSize, Vals&: OriginalHotSize); |
906 | } |
907 | |
908 | // Reverse the action of createEHTrampolines(). The trampolines will be |
909 | // placed immediately before the matching destination resulting in no |
910 | // extra code. |
911 | if (PreSplitLayout.size() != BF.size()) |
912 | PreSplitLayout = mergeEHTrampolines(BF, Layout&: PreSplitLayout, Trampolines); |
913 | |
914 | for (BinaryBasicBlock &BB : BF) |
915 | BB.setFragmentNum(FragmentNum::main()); |
916 | BF.getLayout().update(NewLayout: PreSplitLayout); |
917 | } else { |
918 | SplitBytesHot += HotSize; |
919 | SplitBytesCold += ColdSize; |
920 | } |
921 | } |
922 | |
923 | // Fix branches if the splitting decision of the pass after function |
924 | // reordering is different from that of the pass before function reordering. |
925 | if (LayoutUpdated && BC.HasFinalizedFunctionOrder) |
926 | BF.fixBranches(); |
927 | } |
928 | |
929 | SplitFunctions::TrampolineSetType |
930 | SplitFunctions::createEHTrampolines(BinaryFunction &BF) const { |
931 | const auto &MIB = BF.getBinaryContext().MIB; |
932 | |
933 | // Map real landing pads to the corresponding trampolines. |
934 | TrampolineSetType LPTrampolines; |
935 | |
936 | // Iterate over the copy of basic blocks since we are adding new blocks to the |
937 | // function which will invalidate its iterators. |
938 | std::vector<BinaryBasicBlock *> Blocks(BF.pbegin(), BF.pend()); |
939 | for (BinaryBasicBlock *BB : Blocks) { |
940 | for (MCInst &Instr : *BB) { |
941 | const std::optional<MCPlus::MCLandingPad> EHInfo = MIB->getEHInfo(Inst: Instr); |
942 | if (!EHInfo || !EHInfo->first) |
943 | continue; |
944 | |
945 | const MCSymbol *LPLabel = EHInfo->first; |
946 | BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Label: LPLabel); |
947 | if (BB->getFragmentNum() == LPBlock->getFragmentNum()) |
948 | continue; |
949 | |
950 | const MCSymbol *TrampolineLabel = nullptr; |
951 | const TrampolineKey Key(BB->getFragmentNum(), LPLabel); |
952 | auto Iter = LPTrampolines.find(Val: Key); |
953 | if (Iter != LPTrampolines.end()) { |
954 | TrampolineLabel = Iter->second; |
955 | } else { |
956 | // Create a trampoline basic block in the same fragment as the thrower. |
957 | // Note: there's no need to insert the jump instruction, it will be |
958 | // added by fixBranches(). |
959 | BinaryBasicBlock *TrampolineBB = BF.addBasicBlock(); |
960 | TrampolineBB->setFragmentNum(BB->getFragmentNum()); |
961 | TrampolineBB->setExecutionCount(LPBlock->getExecutionCount()); |
962 | TrampolineBB->addSuccessor(Succ: LPBlock, Count: TrampolineBB->getExecutionCount()); |
963 | TrampolineBB->setCFIState(LPBlock->getCFIState()); |
964 | TrampolineLabel = TrampolineBB->getLabel(); |
965 | LPTrampolines.insert(KV: std::make_pair(x: Key, y&: TrampolineLabel)); |
966 | } |
967 | |
968 | // Substitute the landing pad with the trampoline. |
969 | MIB->updateEHInfo(Inst&: Instr, |
970 | LP: MCPlus::MCLandingPad(TrampolineLabel, EHInfo->second)); |
971 | } |
972 | } |
973 | |
974 | if (LPTrampolines.empty()) |
975 | return LPTrampolines; |
976 | |
977 | // All trampoline blocks were added to the end of the function. Place them at |
978 | // the end of corresponding fragments. |
979 | BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), |
980 | BF.getLayout().block_end()); |
981 | stable_sort(Range&: NewLayout, C: [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { |
982 | return A->getFragmentNum() < B->getFragmentNum(); |
983 | }); |
984 | BF.getLayout().update(NewLayout); |
985 | |
986 | // Conservatively introduce branch instructions. |
987 | BF.fixBranches(); |
988 | |
989 | // Update exception-handling CFG for the function. |
990 | BF.recomputeLandingPads(); |
991 | |
992 | return LPTrampolines; |
993 | } |
994 | |
995 | SplitFunctions::BasicBlockOrderType SplitFunctions::mergeEHTrampolines( |
996 | BinaryFunction &BF, SplitFunctions::BasicBlockOrderType &Layout, |
997 | const SplitFunctions::TrampolineSetType &Trampolines) const { |
998 | DenseMap<const MCSymbol *, SmallVector<const MCSymbol *, 0>> |
999 | IncomingTrampolines; |
1000 | for (const auto &Entry : Trampolines) { |
1001 | IncomingTrampolines[Entry.getFirst().Target].emplace_back( |
1002 | Args: Entry.getSecond()); |
1003 | } |
1004 | |
1005 | BasicBlockOrderType MergedLayout; |
1006 | for (BinaryBasicBlock *BB : Layout) { |
1007 | auto Iter = IncomingTrampolines.find(Val: BB->getLabel()); |
1008 | if (Iter != IncomingTrampolines.end()) { |
1009 | for (const MCSymbol *const Trampoline : Iter->getSecond()) { |
1010 | BinaryBasicBlock *LPBlock = BF.getBasicBlockForLabel(Label: Trampoline); |
1011 | assert(LPBlock && "Could not find matching landing pad block." ); |
1012 | MergedLayout.push_back(Elt: LPBlock); |
1013 | } |
1014 | } |
1015 | MergedLayout.push_back(Elt: BB); |
1016 | } |
1017 | |
1018 | return MergedLayout; |
1019 | } |
1020 | |
1021 | } // namespace bolt |
1022 | } // namespace llvm |
1023 | |