1 | //===- bolt/Passes/LoopInversionPass.cpp ----------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the LoopInversionPass class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/LoopInversionPass.h" |
14 | #include "bolt/Core/ParallelUtilities.h" |
15 | |
16 | using namespace llvm; |
17 | |
18 | namespace opts { |
19 | extern cl::OptionCategory BoltCategory; |
20 | |
21 | extern cl::opt<bolt::ReorderBasicBlocks::LayoutType> ReorderBlocks; |
22 | |
23 | static cl::opt<bool> LoopReorder( |
24 | "loop-inversion-opt" , |
25 | cl::desc("reorder unconditional jump instructions in loops optimization" ), |
26 | cl::init(Val: true), cl::cat(BoltCategory), cl::ReallyHidden); |
27 | } // namespace opts |
28 | |
29 | namespace llvm { |
30 | namespace bolt { |
31 | |
32 | bool LoopInversionPass::runOnFunction(BinaryFunction &BF) { |
33 | bool IsChanged = false; |
34 | if (BF.getLayout().block_size() < 3 || !BF.hasValidProfile()) |
35 | return false; |
36 | |
37 | BF.getLayout().updateLayoutIndices(); |
38 | for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { |
39 | if (BB->succ_size() != 1 || BB->pred_size() != 1) |
40 | continue; |
41 | |
42 | BinaryBasicBlock *SuccBB = *BB->succ_begin(); |
43 | BinaryBasicBlock *PredBB = *BB->pred_begin(); |
44 | const unsigned BBIndex = BB->getLayoutIndex(); |
45 | const unsigned SuccBBIndex = SuccBB->getLayoutIndex(); |
46 | if (SuccBB == PredBB && BB != SuccBB && BBIndex != 0 && SuccBBIndex != 0 && |
47 | SuccBB->succ_size() == 2 && |
48 | BB->getFragmentNum() == SuccBB->getFragmentNum()) { |
49 | // Get the second successor (after loop BB) |
50 | BinaryBasicBlock *SecondSucc = nullptr; |
51 | for (BinaryBasicBlock *Succ : SuccBB->successors()) { |
52 | if (Succ != &*BB) { |
53 | SecondSucc = Succ; |
54 | break; |
55 | } |
56 | } |
57 | |
58 | assert(SecondSucc != nullptr && "Unable to find a second BB successor" ); |
59 | const uint64_t LoopCount = SuccBB->getBranchInfo(Succ: *BB).Count; |
60 | const uint64_t ExitCount = SuccBB->getBranchInfo(Succ: *SecondSucc).Count; |
61 | |
62 | if (LoopCount < ExitCount) { |
63 | if (BBIndex > SuccBBIndex) |
64 | continue; |
65 | } else if (BBIndex < SuccBBIndex) { |
66 | continue; |
67 | } |
68 | |
69 | IsChanged = true; |
70 | BB->setLayoutIndex(SuccBBIndex); |
71 | SuccBB->setLayoutIndex(BBIndex); |
72 | } |
73 | } |
74 | |
75 | if (IsChanged) { |
76 | BinaryFunction::BasicBlockOrderType NewOrder(BF.getLayout().block_begin(), |
77 | BF.getLayout().block_end()); |
78 | llvm::sort(C&: NewOrder, Comp: [&](BinaryBasicBlock *BB1, BinaryBasicBlock *BB2) { |
79 | return BB1->getLayoutIndex() < BB2->getLayoutIndex(); |
80 | }); |
81 | BF.getLayout().update(NewLayout: NewOrder); |
82 | } |
83 | |
84 | return IsChanged; |
85 | } |
86 | |
87 | Error LoopInversionPass::runOnFunctions(BinaryContext &BC) { |
88 | std::atomic<uint64_t> ModifiedFuncCount{0}; |
89 | if (opts::ReorderBlocks == ReorderBasicBlocks::LT_NONE || |
90 | opts::LoopReorder == false) |
91 | return Error::success(); |
92 | |
93 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
94 | if (runOnFunction(BF)) |
95 | ++ModifiedFuncCount; |
96 | }; |
97 | |
98 | ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { |
99 | return !shouldOptimize(BF); |
100 | }; |
101 | |
102 | ParallelUtilities::runOnEachFunction( |
103 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, SkipPredicate: SkipFunc, |
104 | LogName: "LoopInversionPass" ); |
105 | |
106 | BC.outs() << "BOLT-INFO: " << ModifiedFuncCount |
107 | << " Functions were reordered by LoopInversionPass\n" ; |
108 | return Error::success(); |
109 | } |
110 | |
111 | } // end namespace bolt |
112 | } // end namespace llvm |
113 | |