1 | //===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements the AlignerPass class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "bolt/Passes/Aligner.h" |
14 | #include "bolt/Core/ParallelUtilities.h" |
15 | |
16 | #define DEBUG_TYPE "bolt-aligner" |
17 | |
18 | using namespace llvm; |
19 | |
20 | namespace opts { |
21 | |
22 | extern cl::OptionCategory BoltOptCategory; |
23 | |
24 | extern cl::opt<bool> AlignBlocks; |
25 | extern cl::opt<bool> PreserveBlocksAlignment; |
26 | extern cl::opt<unsigned> AlignFunctions; |
27 | |
28 | static cl::opt<unsigned> AlignBlocksMinSize( |
29 | "align-blocks-min-size" , |
30 | cl::desc("minimal size of the basic block that should be aligned" ), |
31 | cl::init(Val: 0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); |
32 | |
33 | static cl::opt<unsigned> AlignBlocksThreshold( |
34 | "align-blocks-threshold" , |
35 | cl::desc( |
36 | "align only blocks with frequency larger than containing function " |
37 | "execution frequency specified in percent. E.g. 1000 means aligning " |
38 | "blocks that are 10 times more frequently executed than the " |
39 | "containing function." ), |
40 | cl::init(Val: 800), cl::Hidden, cl::cat(BoltOptCategory)); |
41 | |
42 | static cl::opt<unsigned> AlignFunctionsMaxBytes( |
43 | "align-functions-max-bytes" , |
44 | cl::desc("maximum number of bytes to use to align functions" ), cl::init(Val: 32), |
45 | cl::cat(BoltOptCategory)); |
46 | |
47 | static cl::opt<unsigned> |
48 | BlockAlignment("block-alignment" , |
49 | cl::desc("boundary to use for alignment of basic blocks" ), |
50 | cl::init(Val: 16), cl::ZeroOrMore, cl::cat(BoltOptCategory)); |
51 | |
52 | static cl::opt<bool> |
53 | UseCompactAligner("use-compact-aligner" , |
54 | cl::desc("Use compact approach for aligning functions" ), |
55 | cl::init(Val: true), cl::cat(BoltOptCategory)); |
56 | |
57 | } // end namespace opts |
58 | |
59 | namespace llvm { |
60 | namespace bolt { |
61 | |
62 | // Align function to the specified byte-boundary (typically, 64) offsetting |
63 | // the fuction by not more than the corresponding value |
64 | static void alignMaxBytes(BinaryFunction &Function) { |
65 | Function.setAlignment(opts::AlignFunctions); |
66 | Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); |
67 | Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); |
68 | } |
69 | |
70 | // Align function to the specified byte-boundary (typically, 64) offsetting |
71 | // the fuction by not more than the minimum over |
72 | // -- the size of the function |
73 | // -- the specified number of bytes |
74 | static void alignCompact(BinaryFunction &Function, |
75 | const MCCodeEmitter *Emitter) { |
76 | const BinaryContext &BC = Function.getBinaryContext(); |
77 | size_t HotSize = 0; |
78 | size_t ColdSize = 0; |
79 | |
80 | for (const BinaryBasicBlock &BB : Function) |
81 | if (BB.isSplit()) |
82 | ColdSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter); |
83 | else |
84 | HotSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter); |
85 | |
86 | Function.setAlignment(opts::AlignFunctions); |
87 | if (HotSize > 0) |
88 | Function.setMaxAlignmentBytes( |
89 | std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: HotSize)); |
90 | |
91 | // using the same option, max-align-bytes, both for cold and hot parts of the |
92 | // functions, as aligning cold functions typically does not affect performance |
93 | if (ColdSize > 0) |
94 | Function.setMaxColdAlignmentBytes( |
95 | std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: ColdSize)); |
96 | } |
97 | |
98 | void AlignerPass::alignBlocks(BinaryFunction &Function, |
99 | const MCCodeEmitter *Emitter) { |
100 | if (!Function.hasValidProfile() || !Function.isSimple()) |
101 | return; |
102 | |
103 | const BinaryContext &BC = Function.getBinaryContext(); |
104 | |
105 | const uint64_t FuncCount = |
106 | std::max<uint64_t>(a: 1, b: Function.getKnownExecutionCount()); |
107 | BinaryBasicBlock *PrevBB = nullptr; |
108 | for (BinaryBasicBlock *BB : Function.getLayout().blocks()) { |
109 | uint64_t Count = BB->getKnownExecutionCount(); |
110 | |
111 | if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { |
112 | PrevBB = BB; |
113 | continue; |
114 | } |
115 | |
116 | uint64_t FTCount = 0; |
117 | if (PrevBB && PrevBB->getFallthrough() == BB) |
118 | FTCount = PrevBB->getBranchInfo(Succ: *BB).Count; |
119 | |
120 | PrevBB = BB; |
121 | |
122 | if (Count < FTCount * 2) |
123 | continue; |
124 | |
125 | const uint64_t BlockSize = |
126 | BC.computeCodeSize(Beg: BB->begin(), End: BB->end(), Emitter); |
127 | const uint64_t BytesToUse = |
128 | std::min<uint64_t>(a: opts::BlockAlignment - 1, b: BlockSize); |
129 | |
130 | if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) |
131 | continue; |
132 | |
133 | BB->setAlignment(opts::BlockAlignment); |
134 | BB->setAlignmentMaxBytes(BytesToUse); |
135 | |
136 | // Update stats. |
137 | LLVM_DEBUG( |
138 | std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx); |
139 | AlignHistogram[BytesToUse]++; |
140 | AlignedBlocksCount += BB->getKnownExecutionCount(); |
141 | ); |
142 | } |
143 | } |
144 | |
145 | Error AlignerPass::runOnFunctions(BinaryContext &BC) { |
146 | if (!BC.HasRelocations) |
147 | return Error::success(); |
148 | |
149 | AlignHistogram.resize(new_size: opts::BlockAlignment); |
150 | |
151 | ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { |
152 | // Create a separate MCCodeEmitter to allow lock free execution |
153 | BinaryContext::IndependentCodeEmitter Emitter = |
154 | BC.createIndependentMCCodeEmitter(); |
155 | |
156 | if (opts::UseCompactAligner) |
157 | alignCompact(Function&: BF, Emitter: Emitter.MCE.get()); |
158 | else |
159 | alignMaxBytes(Function&: BF); |
160 | |
161 | if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) |
162 | alignBlocks(Function&: BF, Emitter: Emitter.MCE.get()); |
163 | }; |
164 | |
165 | ParallelUtilities::runOnEachFunction( |
166 | BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun, |
167 | SkipPredicate: ParallelUtilities::PredicateTy(nullptr), LogName: "AlignerPass" ); |
168 | |
169 | LLVM_DEBUG( |
170 | dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n" ; |
171 | for (unsigned I = 1; I < AlignHistogram.size(); ++I) |
172 | dbgs() << " " << I << " : " << AlignHistogram[I] << '\n'; |
173 | |
174 | dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: " |
175 | << AlignedBlocksCount << '\n'; |
176 | ); |
177 | return Error::success(); |
178 | } |
179 | |
180 | } // end namespace bolt |
181 | } // end namespace llvm |
182 | |