1//===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AlignerPass class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/Aligner.h"
14#include "bolt/Core/ParallelUtilities.h"
15
16#define DEBUG_TYPE "bolt-aligner"
17
18using namespace llvm;
19
20namespace opts {
21
22extern cl::OptionCategory BoltOptCategory;
23
24extern cl::opt<bool> AlignBlocks;
25extern cl::opt<bool> PreserveBlocksAlignment;
26extern cl::opt<unsigned> AlignFunctions;
27
28cl::opt<unsigned>
29AlignBlocksMinSize("align-blocks-min-size",
30 cl::desc("minimal size of the basic block that should be aligned"),
31 cl::init(Val: 0),
32 cl::ZeroOrMore,
33 cl::Hidden,
34 cl::cat(BoltOptCategory));
35
36cl::opt<unsigned> AlignBlocksThreshold(
37 "align-blocks-threshold",
38 cl::desc(
39 "align only blocks with frequency larger than containing function "
40 "execution frequency specified in percent. E.g. 1000 means aligning "
41 "blocks that are 10 times more frequently executed than the "
42 "containing function."),
43 cl::init(Val: 800), cl::Hidden, cl::cat(BoltOptCategory));
44
45cl::opt<unsigned> AlignFunctionsMaxBytes(
46 "align-functions-max-bytes",
47 cl::desc("maximum number of bytes to use to align functions"), cl::init(Val: 32),
48 cl::cat(BoltOptCategory));
49
50cl::opt<unsigned>
51BlockAlignment("block-alignment",
52 cl::desc("boundary to use for alignment of basic blocks"),
53 cl::init(Val: 16),
54 cl::ZeroOrMore,
55 cl::cat(BoltOptCategory));
56
57cl::opt<bool>
58 UseCompactAligner("use-compact-aligner",
59 cl::desc("Use compact approach for aligning functions"),
60 cl::init(Val: true), cl::cat(BoltOptCategory));
61
62} // end namespace opts
63
64namespace llvm {
65namespace bolt {
66
67// Align function to the specified byte-boundary (typically, 64) offsetting
68// the fuction by not more than the corresponding value
69static void alignMaxBytes(BinaryFunction &Function) {
70 Function.setAlignment(opts::AlignFunctions);
71 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
72 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
73}
74
75// Align function to the specified byte-boundary (typically, 64) offsetting
76// the fuction by not more than the minimum over
77// -- the size of the function
78// -- the specified number of bytes
79static void alignCompact(BinaryFunction &Function,
80 const MCCodeEmitter *Emitter) {
81 const BinaryContext &BC = Function.getBinaryContext();
82 size_t HotSize = 0;
83 size_t ColdSize = 0;
84
85 for (const BinaryBasicBlock &BB : Function)
86 if (BB.isSplit())
87 ColdSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter);
88 else
89 HotSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter);
90
91 Function.setAlignment(opts::AlignFunctions);
92 if (HotSize > 0)
93 Function.setMaxAlignmentBytes(
94 std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: HotSize));
95
96 // using the same option, max-align-bytes, both for cold and hot parts of the
97 // functions, as aligning cold functions typically does not affect performance
98 if (ColdSize > 0)
99 Function.setMaxColdAlignmentBytes(
100 std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: ColdSize));
101}
102
103void AlignerPass::alignBlocks(BinaryFunction &Function,
104 const MCCodeEmitter *Emitter) {
105 if (!Function.hasValidProfile() || !Function.isSimple())
106 return;
107
108 const BinaryContext &BC = Function.getBinaryContext();
109
110 const uint64_t FuncCount =
111 std::max<uint64_t>(a: 1, b: Function.getKnownExecutionCount());
112 BinaryBasicBlock *PrevBB = nullptr;
113 for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
114 uint64_t Count = BB->getKnownExecutionCount();
115
116 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
117 PrevBB = BB;
118 continue;
119 }
120
121 uint64_t FTCount = 0;
122 if (PrevBB && PrevBB->getFallthrough() == BB)
123 FTCount = PrevBB->getBranchInfo(Succ: *BB).Count;
124
125 PrevBB = BB;
126
127 if (Count < FTCount * 2)
128 continue;
129
130 const uint64_t BlockSize =
131 BC.computeCodeSize(Beg: BB->begin(), End: BB->end(), Emitter);
132 const uint64_t BytesToUse =
133 std::min<uint64_t>(a: opts::BlockAlignment - 1, b: BlockSize);
134
135 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
136 continue;
137
138 BB->setAlignment(opts::BlockAlignment);
139 BB->setAlignmentMaxBytes(BytesToUse);
140
141 // Update stats.
142 LLVM_DEBUG(
143 std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx);
144 AlignHistogram[BytesToUse]++;
145 AlignedBlocksCount += BB->getKnownExecutionCount();
146 );
147 }
148}
149
150Error AlignerPass::runOnFunctions(BinaryContext &BC) {
151 if (!BC.HasRelocations)
152 return Error::success();
153
154 AlignHistogram.resize(new_size: opts::BlockAlignment);
155
156 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
157 // Create a separate MCCodeEmitter to allow lock free execution
158 BinaryContext::IndependentCodeEmitter Emitter =
159 BC.createIndependentMCCodeEmitter();
160
161 if (opts::UseCompactAligner)
162 alignCompact(Function&: BF, Emitter: Emitter.MCE.get());
163 else
164 alignMaxBytes(Function&: BF);
165
166 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
167 alignBlocks(Function&: BF, Emitter: Emitter.MCE.get());
168 };
169
170 ParallelUtilities::runOnEachFunction(
171 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun,
172 SkipPredicate: ParallelUtilities::PredicateTy(nullptr), LogName: "AlignerPass");
173
174 LLVM_DEBUG(
175 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
176 for (unsigned I = 1; I < AlignHistogram.size(); ++I)
177 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
178
179 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
180 << AlignedBlocksCount << '\n';
181 );
182 return Error::success();
183}
184
185} // end namespace bolt
186} // end namespace llvm
187

source code of bolt/lib/Passes/Aligner.cpp