1//===- bolt/Passes/Aligner.cpp - Pass for optimal code alignment ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AlignerPass class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "bolt/Passes/Aligner.h"
14#include "bolt/Core/ParallelUtilities.h"
15
16#define DEBUG_TYPE "bolt-aligner"
17
18using namespace llvm;
19
20namespace opts {
21
22extern cl::OptionCategory BoltOptCategory;
23
24extern cl::opt<bool> AlignBlocks;
25extern cl::opt<bool> PreserveBlocksAlignment;
26extern cl::opt<unsigned> AlignFunctions;
27
28static cl::opt<unsigned> AlignBlocksMinSize(
29 "align-blocks-min-size",
30 cl::desc("minimal size of the basic block that should be aligned"),
31 cl::init(Val: 0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
32
33static cl::opt<unsigned> AlignBlocksThreshold(
34 "align-blocks-threshold",
35 cl::desc(
36 "align only blocks with frequency larger than containing function "
37 "execution frequency specified in percent. E.g. 1000 means aligning "
38 "blocks that are 10 times more frequently executed than the "
39 "containing function."),
40 cl::init(Val: 800), cl::Hidden, cl::cat(BoltOptCategory));
41
42static cl::opt<unsigned> AlignFunctionsMaxBytes(
43 "align-functions-max-bytes",
44 cl::desc("maximum number of bytes to use to align functions"), cl::init(Val: 32),
45 cl::cat(BoltOptCategory));
46
47static cl::opt<unsigned>
48 BlockAlignment("block-alignment",
49 cl::desc("boundary to use for alignment of basic blocks"),
50 cl::init(Val: 16), cl::ZeroOrMore, cl::cat(BoltOptCategory));
51
52static cl::opt<bool>
53 UseCompactAligner("use-compact-aligner",
54 cl::desc("Use compact approach for aligning functions"),
55 cl::init(Val: true), cl::cat(BoltOptCategory));
56
57} // end namespace opts
58
59namespace llvm {
60namespace bolt {
61
62// Align function to the specified byte-boundary (typically, 64) offsetting
63// the fuction by not more than the corresponding value
64static void alignMaxBytes(BinaryFunction &Function) {
65 Function.setAlignment(opts::AlignFunctions);
66 Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes);
67 Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes);
68}
69
70// Align function to the specified byte-boundary (typically, 64) offsetting
71// the fuction by not more than the minimum over
72// -- the size of the function
73// -- the specified number of bytes
74static void alignCompact(BinaryFunction &Function,
75 const MCCodeEmitter *Emitter) {
76 const BinaryContext &BC = Function.getBinaryContext();
77 size_t HotSize = 0;
78 size_t ColdSize = 0;
79
80 for (const BinaryBasicBlock &BB : Function)
81 if (BB.isSplit())
82 ColdSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter);
83 else
84 HotSize += BC.computeCodeSize(Beg: BB.begin(), End: BB.end(), Emitter);
85
86 Function.setAlignment(opts::AlignFunctions);
87 if (HotSize > 0)
88 Function.setMaxAlignmentBytes(
89 std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: HotSize));
90
91 // using the same option, max-align-bytes, both for cold and hot parts of the
92 // functions, as aligning cold functions typically does not affect performance
93 if (ColdSize > 0)
94 Function.setMaxColdAlignmentBytes(
95 std::min(a: size_t(opts::AlignFunctionsMaxBytes), b: ColdSize));
96}
97
98void AlignerPass::alignBlocks(BinaryFunction &Function,
99 const MCCodeEmitter *Emitter) {
100 if (!Function.hasValidProfile() || !Function.isSimple())
101 return;
102
103 const BinaryContext &BC = Function.getBinaryContext();
104
105 const uint64_t FuncCount =
106 std::max<uint64_t>(a: 1, b: Function.getKnownExecutionCount());
107 BinaryBasicBlock *PrevBB = nullptr;
108 for (BinaryBasicBlock *BB : Function.getLayout().blocks()) {
109 uint64_t Count = BB->getKnownExecutionCount();
110
111 if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) {
112 PrevBB = BB;
113 continue;
114 }
115
116 uint64_t FTCount = 0;
117 if (PrevBB && PrevBB->getFallthrough() == BB)
118 FTCount = PrevBB->getBranchInfo(Succ: *BB).Count;
119
120 PrevBB = BB;
121
122 if (Count < FTCount * 2)
123 continue;
124
125 const uint64_t BlockSize =
126 BC.computeCodeSize(Beg: BB->begin(), End: BB->end(), Emitter);
127 const uint64_t BytesToUse =
128 std::min<uint64_t>(a: opts::BlockAlignment - 1, b: BlockSize);
129
130 if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize)
131 continue;
132
133 BB->setAlignment(opts::BlockAlignment);
134 BB->setAlignmentMaxBytes(BytesToUse);
135
136 // Update stats.
137 LLVM_DEBUG(
138 std::unique_lock<llvm::sys::RWMutex> Lock(AlignHistogramMtx);
139 AlignHistogram[BytesToUse]++;
140 AlignedBlocksCount += BB->getKnownExecutionCount();
141 );
142 }
143}
144
145Error AlignerPass::runOnFunctions(BinaryContext &BC) {
146 if (!BC.HasRelocations)
147 return Error::success();
148
149 AlignHistogram.resize(new_size: opts::BlockAlignment);
150
151 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
152 // Create a separate MCCodeEmitter to allow lock free execution
153 BinaryContext::IndependentCodeEmitter Emitter =
154 BC.createIndependentMCCodeEmitter();
155
156 if (opts::UseCompactAligner)
157 alignCompact(Function&: BF, Emitter: Emitter.MCE.get());
158 else
159 alignMaxBytes(Function&: BF);
160
161 if (opts::AlignBlocks && !opts::PreserveBlocksAlignment)
162 alignBlocks(Function&: BF, Emitter: Emitter.MCE.get());
163 };
164
165 ParallelUtilities::runOnEachFunction(
166 BC, SchedPolicy: ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFunction: WorkFun,
167 SkipPredicate: ParallelUtilities::PredicateTy(nullptr), LogName: "AlignerPass");
168
169 LLVM_DEBUG(
170 dbgs() << "BOLT-DEBUG: max bytes per basic block alignment distribution:\n";
171 for (unsigned I = 1; I < AlignHistogram.size(); ++I)
172 dbgs() << " " << I << " : " << AlignHistogram[I] << '\n';
173
174 dbgs() << "BOLT-DEBUG: total execution count of aligned blocks: "
175 << AlignedBlocksCount << '\n';
176 );
177 return Error::success();
178}
179
180} // end namespace bolt
181} // end namespace llvm
182

Provided by KDAB

Privacy Policy
Update your C++ knowledge – Modern C++11/14/17 Training
Find out more

source code of bolt/lib/Passes/Aligner.cpp