1 | //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AlignerPass class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/Aligner.h" |
14 | #include "bolt/Core/ParallelUtilities.h" |
15 | |
16 | #define DEBUG_TYPE "bolt-aligner" |
17 | |
18 | using namespace llvm; |
19 | |
20 | namespace opts { |
21 | |
22 | extern cl::OptionCategory BoltOptCategory; |
23 | |
24 | extern cl::opt<bool> AlignBlocks; |
25 | extern cl::opt<bool> PreserveBlocksAlignment; |
26 | extern cl::opt<unsigned> AlignFunctions; |
27 | |
28 | cl::opt<unsigned> |
29 | AlignBlocksMinSize("align-blocks-min-size" , |
30 | cl::desc("minimal size of the basic block that should be aligned" ), |
31 | cl::init(Val: 0), |
32 | cl::ZeroOrMore, |
33 | cl::Hidden, |
34 | cl::cat(BoltOptCategory)); |
35 | |
36 | cl::opt<unsigned> AlignBlocksThreshold( |
37 | "align-blocks-threshold" , |
38 | cl::desc( |
39 | "align only blocks with frequency larger than containing function " |
40 | "execution frequency specified in percent. E.g. 1000 means aligning " |
41 | "blocks that are 10 times more frequently executed than the " |
42 | "containing function." ), |
43 | cl::init(Val: 800), cl::Hidden, cl::cat(BoltOptCategory)); |
44 | |
45 | cl::opt<unsigned> AlignFunctionsMaxBytes( |
46 | "align-functions-max-bytes" , |
47 | cl::desc("maximum number of bytes to use to align functions" ), cl::init(Val: 32), |
48 | cl::cat(BoltOptCategory)); |
49 | |
50 | cl::opt<unsigned> |
51 | BlockAlignment("block-alignment" , |
52 | cl::desc("boundary to use for alignment of basic blocks" ), |
53 | cl::init(Val: 16), |
54 | cl::ZeroOrMore, |
55 | cl::cat(BoltOptCategory)); |
56 | |
57 | cl::opt<bool> |
58 | UseCompactAligner("use-compact-aligner" , |
59 | cl::desc("Use compact approach for aligning functions" ), |
60 | cl::init(Val: true), cl::cat(BoltOptCategory)); |
61 | |
62 | } // end namespace opts |
63 | |
64 | namespace llvm { |
65 | namespace bolt { |
66 | |
67 | // Align function to the specified byte-boundary (typically, 64) offsetting |
68 | // the fuction by not more than the corresponding value |
69 | static void alignMaxBytes(BinaryFunction &Function) { |
70 | Function.setAlignment(opts::AlignFunctions); |
71 | Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); |
72 | Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); |
73 | } |
74 | |
75 | // Align function to the specified byte-boundary (typically, 64) offsetting |
76 | // the fuction by not more than the minimum over |
77 | // -- the size of the function |
78 | // -- the specified number of bytes |
79 | static void alignCompact(BinaryFunction &Function, |
80 | const MCCodeEmitter *Emitter) { |
81 | const BinaryContext &BC = Function.getBinaryContext(); |
82 | size_t HotSize = 0; |
83 | size_t ColdSize = 0; |
84 | |
85 | for (const BinaryBasicBlock &BB : Function) |
86 | if (BB.isSplit()) |
87 | ColdSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter); |
88 | else |
89 | HotSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter); |
90 | |
91 | Function.setAlignment(opts::AlignFunctions); |
92 | if (HotSize > 0) |
93 | Function.setMaxAlignmentBytes( |
94 | std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: HotSize)); |
95 | |
96 | // using the same option, max-align-bytes, both for cold and hot parts of the |
97 | // functions, as aligning cold functions typically does not affect performance |
98 | if (ColdSize > 0) |
99 | Function.setMaxColdAlignmentBytes( |
100 | std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: ColdSize)); |
101 | } |
102 | |
103 | void AlignerPass::alignBlocks(BinaryFunction &Function, |
104 | const MCCodeEmitter *Emitter) { |
105 | if (!Function.hasValidProfile() || !Function.isSimple()) |
106 | return; |
107 | |
108 | const BinaryContext &BC = Function.getBinaryContext(); |
109 | |
110 | const uint64_t FuncCount = |
111 | std::max<uint64_t>(a: 1, b: Function.getKnownExecutionCount()); |
112 | BinaryBasicBlock *PrevBB = nullptr; |
113 | for (BinaryBasicBlock *BB : Function.getLayout().blocks()) { |
114 | uint64_t Count = BB->getKnownExecutionCount(); |
115 | |
116 | if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { |
117 | PrevBB = BB; |
118 | continue; |
119 | } |
120 | |
121 | uint64_t FTCount = 0; |
122 | if (PrevBB && PrevBB->getFallthrough() == BB) |
123 | FTCount = PrevBB->getBranchInfo(Succ: *BB).Count; |
124 | |
125 | PrevBB = BB; |
126 | |
127 | if (Count < FTCount * 2) |
128 | continue; |
129 | |
130 | const uint64_t BlockSize = |
131 | BC.computeCodeSize(Beg: BB->begin(), End: BB->end(), Emitter); |
132 | const uint64_t BytesToUse = |
133 | std::min<uint64_t>(a: opts::BlockAlignment - 1, b: BlockSize); |
134 | |
135 | if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) |
136 | continue; |
137 | |
138 | BB->setAlignment(opts::BlockAlignment); |
139 | BB->setAlignmentMaxBytes(BytesToUse); |
140 | |
141 | // Update stats. |
142 | LLVM_DEBUG( |
143 | std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx); |
144 | AlignHistogram[BytesToUse]++; |
145 | AlignedBlocksCount += BB->getKnownExecutionCount(); |
146 | ); |
147 | } |
148 | } |
149 | |
150 | Error AlignerPass::runOnFunctions(BinaryContext &BC) { |
151 | if (!BC.HasRelocations) |
152 | return Error::success(); |
153 | |
154 | AlignHistogram.resize(new_size: opts::BlockAlignment); |
155 | |
156 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
157 | // Create a separate MCCodeEmitter to allow lock free execution |
158 | BinaryContext::IndependentCodeEmitter Emitter = |
159 | BC.createIndependentMCCodeEmitter(); |
160 | |
161 | if (opts::UseCompactAligner) |
162 | alignCompact(Function&: BF, Emitter: Emitter.MCE.get()); |
163 | else |
164 | alignMaxBytes(Function&: BF); |
165 | |
166 | if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) |
167 | alignBlocks(Function&: BF, Emitter: Emitter.MCE.get()); |
168 | }; |
169 | |
170 | ParallelUtilities::runOnEachFunction( |
171 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, |
172 | SkipPredicate: ParallelUtilities::PredicateTy(nullptr), LogName: "AlignerPass" ); |
173 | |
174 | LLVM_DEBUG( |
175 | dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n" ; |
176 | for (unsigned I = 1; I < AlignHistogram.size(); ++I) |
177 | dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; |
178 | |
179 | dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " |
180 | << AlignedBlocksCount << '\n'; |
181 | ); |
182 | return Error::success(); |
183 | } |
184 | |
185 | } // end namespace bolt |
186 | } // end namespace llvm |
187 | |