1//===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The set of optimization/analysis passes that run on BinaryFunctions.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef BOLT_PASSES_BINARY_PASSES_H
14#define BOLT_PASSES_BINARY_PASSES_H
15
16#include "bolt/Core/BinaryContext.h"
17#include "bolt/Core/BinaryFunction.h"
18#include "bolt/Core/DynoStats.h"
19#include "bolt/Profile/BoltAddressTranslation.h"
20#include "llvm/Support/CommandLine.h"
21#include <atomic>
22#include <set>
23#include <string>
24#include <unordered_set>
25
26namespace llvm {
27namespace bolt {
28
29/// An optimization/analysis pass that runs on functions.
30class BinaryFunctionPass {
31protected:
32 bool PrintPass;
33
34 explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {}
35
36 /// Control whether a specific function should be skipped during
37 /// optimization.
38 virtual bool shouldOptimize(const BinaryFunction &BF) const;
39
40public:
41 virtual ~BinaryFunctionPass() = default;
42
43 /// The name of this pass
44 virtual const char *getName() const = 0;
45
46 /// Control whether debug info is printed after this pass is completed.
47 bool printPass() const { return PrintPass; }
48
49 /// Control whether debug info is printed for an individual function after
50 /// this pass is completed (printPass() must have returned true).
51 virtual bool shouldPrint(const BinaryFunction &BF) const;
52
53 virtual Error runOnFunctions(BinaryContext &BC) = 0;
54};
55
56/// A pass to set initial program-wide dynostats.
57class DynoStatsSetPass : public BinaryFunctionPass {
58public:
59 DynoStatsSetPass() : BinaryFunctionPass(false) {}
60
61 const char *getName() const override {
62 return "set dyno-stats before optimizations";
63 }
64
65 bool shouldPrint(const BinaryFunction &BF) const override { return false; }
66
67 Error runOnFunctions(BinaryContext &BC) override {
68 BC.InitialDynoStats = getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64());
69 return Error::success();
70 }
71};
72
73/// A pass to print program-wide dynostats.
74class DynoStatsPrintPass : public BinaryFunctionPass {
75protected:
76 std::string Title;
77
78public:
79 DynoStatsPrintPass(const char *Title)
80 : BinaryFunctionPass(false), Title(Title) {}
81
82 const char *getName() const override {
83 return "print dyno-stats after optimizations";
84 }
85
86 bool shouldPrint(const BinaryFunction &BF) const override { return false; }
87
88 Error runOnFunctions(BinaryContext &BC) override {
89 const DynoStats PrevDynoStats = BC.InitialDynoStats;
90 const DynoStats NewDynoStats =
91 getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64());
92 const bool Changed = (NewDynoStats != PrevDynoStats);
93 BC.outs() << "BOLT-INFO: program-wide dynostats " << Title
94 << (Changed ? "" : " (no change)") << ":\n\n"
95 << PrevDynoStats;
96 if (Changed) {
97 BC.outs() << '\n';
98 NewDynoStats.print(OS&: BC.outs(), Other: &PrevDynoStats, Printer: BC.InstPrinter.get());
99 }
100 BC.outs() << '\n';
101 return Error::success();
102 }
103};
104
105/// The pass normalizes CFG by performing the following transformations:
106/// * removes empty basic blocks
107/// * merges duplicate edges and updates jump instructions
108class NormalizeCFG : public BinaryFunctionPass {
109 std::atomic<uint64_t> NumBlocksRemoved{0};
110 std::atomic<uint64_t> NumDuplicateEdgesMerged{0};
111
112 void runOnFunction(BinaryFunction &BF);
113
114public:
115 NormalizeCFG(const cl::opt<bool> &PrintPass)
116 : BinaryFunctionPass(PrintPass) {}
117
118 const char *getName() const override { return "normalize CFG"; }
119
120 Error runOnFunctions(BinaryContext &) override;
121};
122
123/// Detect and eliminate unreachable basic blocks. We could have those
124/// filled with nops and they are used for alignment.
125class EliminateUnreachableBlocks : public BinaryFunctionPass {
126 std::unordered_set<const BinaryFunction *> Modified;
127 std::atomic<unsigned> DeletedBlocks{0};
128 std::atomic<uint64_t> DeletedBytes{0};
129 void runOnFunction(BinaryFunction &Function);
130
131public:
132 EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
133 : BinaryFunctionPass(PrintPass) {}
134
135 const char *getName() const override { return "eliminate-unreachable"; }
136 bool shouldPrint(const BinaryFunction &BF) const override {
137 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
138 }
139 Error runOnFunctions(BinaryContext &) override;
140};
141
142// Reorder the basic blocks for each function based on hotness.
143class ReorderBasicBlocks : public BinaryFunctionPass {
144public:
145 /// Choose which strategy should the block layout heuristic prioritize when
146 /// facing conflicting goals.
147 enum LayoutType : char {
148 /// LT_NONE - do not change layout of basic blocks
149 LT_NONE = 0, /// no reordering
150 /// LT_REVERSE - reverse the order of basic blocks, meant for testing
151 /// purposes. The first basic block is left intact and the rest are
152 /// put in the reverse order.
153 LT_REVERSE,
154 /// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
155 LT_OPTIMIZE,
156 /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
157 /// paper (PLDI '90) about block reordering, trying to minimize branch
158 /// mispredictions.
159 LT_OPTIMIZE_BRANCH,
160 /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04)
161 /// that suggests putting frequently executed chains first in the layout.
162 LT_OPTIMIZE_CACHE,
163 // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation.
164 LT_OPTIMIZE_CACHE_PLUS,
165 /// Block reordering guided by the extended TSP metric.
166 LT_OPTIMIZE_EXT_TSP,
167 /// Create clusters and use random order for them.
168 LT_OPTIMIZE_SHUFFLE,
169 };
170
171private:
172 /// Run the specified layout algorithm on the given function. Returns `true`
173 /// if the order of blocks was changed.
174 bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type,
175 bool MinBranchClusters) const;
176
177public:
178 explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
179 : BinaryFunctionPass(PrintPass) {}
180
181 bool shouldOptimize(const BinaryFunction &BF) const override;
182
183 const char *getName() const override { return "reorder-blocks"; }
184 bool shouldPrint(const BinaryFunction &BF) const override;
185 Error runOnFunctions(BinaryContext &BC) override;
186};
187
188/// Sync local branches with CFG.
189class FixupBranches : public BinaryFunctionPass {
190public:
191 explicit FixupBranches(const cl::opt<bool> &PrintPass)
192 : BinaryFunctionPass(PrintPass) {}
193
194 const char *getName() const override { return "fix-branches"; }
195 Error runOnFunctions(BinaryContext &BC) override;
196};
197
198/// Fix the CFI state and exception handling information after all other
199/// passes have completed.
200class FinalizeFunctions : public BinaryFunctionPass {
201public:
202 explicit FinalizeFunctions(const cl::opt<bool> &PrintPass)
203 : BinaryFunctionPass(PrintPass) {}
204
205 const char *getName() const override { return "finalize-functions"; }
206 Error runOnFunctions(BinaryContext &BC) override;
207};
208
209/// Perform any necessary adjustments for functions that do not fit into their
210/// original space in non-relocation mode.
211class CheckLargeFunctions : public BinaryFunctionPass {
212public:
213 explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass)
214 : BinaryFunctionPass(PrintPass) {}
215
216 const char *getName() const override { return "check-large-functions"; }
217
218 Error runOnFunctions(BinaryContext &BC) override;
219
220 bool shouldOptimize(const BinaryFunction &BF) const override;
221};
222
223/// Convert and remove all BOLT-related annotations before LLVM code emission.
224class LowerAnnotations : public BinaryFunctionPass {
225public:
226 explicit LowerAnnotations(const cl::opt<bool> &PrintPass)
227 : BinaryFunctionPass(PrintPass) {}
228
229 const char *getName() const override { return "lower-annotations"; }
230 Error runOnFunctions(BinaryContext &BC) override;
231};
232
233/// Clean the state of the MC representation before sending it to emission
234class CleanMCState : public BinaryFunctionPass {
235public:
236 explicit CleanMCState(const cl::opt<bool> &PrintPass)
237 : BinaryFunctionPass(PrintPass) {}
238
239 const char *getName() const override { return "clean-mc-state"; }
240 Error runOnFunctions(BinaryContext &BC) override;
241};
242
243/// An optimization to simplify conditional tail calls by removing
244/// unnecessary branches.
245///
246/// This optimization considers both of the following cases:
247///
248/// foo: ...
249/// jcc L1 original
250/// ...
251/// L1: jmp bar # TAILJMP
252///
253/// ->
254///
255/// foo: ...
256/// jcc bar iff jcc L1 is expected
257/// ...
258///
259/// L1 is unreachable
260///
261/// OR
262///
263/// foo: ...
264/// jcc L2
265/// L1: jmp dest # TAILJMP
266/// L2: ...
267///
268/// ->
269///
270/// foo: jncc dest # TAILJMP
271/// L2: ...
272///
273/// L1 is unreachable
274///
275/// For this particular case, the first basic block ends with
276/// a conditional branch and has two successors, one fall-through
277/// and one for when the condition is true.
278/// The target of the conditional is a basic block with a single
279/// unconditional branch (i.e. tail call) to another function.
280/// We don't care about the contents of the fall-through block.
281/// We assume that the target of the conditional branch is the
282/// first successor.
283class SimplifyConditionalTailCalls : public BinaryFunctionPass {
284 uint64_t NumCandidateTailCalls{0};
285 uint64_t NumTailCallsPatched{0};
286 uint64_t CTCExecCount{0};
287 uint64_t CTCTakenCount{0};
288 uint64_t NumOrigForwardBranches{0};
289 uint64_t NumOrigBackwardBranches{0};
290 uint64_t NumDoubleJumps{0};
291 uint64_t DeletedBlocks{0};
292 uint64_t DeletedBytes{0};
293 std::unordered_set<const BinaryFunction *> Modified;
294 std::set<const BinaryBasicBlock *> BeenOptimized;
295
296 bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
297 const MCInst &CondBranch, const BinaryBasicBlock *BB,
298 const bool DirectionFlag);
299
300 uint64_t fixTailCalls(BinaryFunction &BF);
301
302public:
303 explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
304 : BinaryFunctionPass(PrintPass) {}
305
306 const char *getName() const override {
307 return "simplify-conditional-tail-calls";
308 }
309 bool shouldPrint(const BinaryFunction &BF) const override {
310 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
311 }
312 Error runOnFunctions(BinaryContext &BC) override;
313};
314
315/// Convert instructions to the form with the minimum operand width.
316class ShortenInstructions : public BinaryFunctionPass {
317 uint64_t shortenInstructions(BinaryFunction &Function);
318
319public:
320 explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
321 : BinaryFunctionPass(PrintPass) {}
322
323 const char *getName() const override { return "shorten-instructions"; }
324
325 Error runOnFunctions(BinaryContext &BC) override;
326};
327
328/// Perform simple peephole optimizations.
329class Peepholes : public BinaryFunctionPass {
330public:
331 enum PeepholeOpts : char {
332 PEEP_NONE = 0x0,
333 PEEP_DOUBLE_JUMPS = 0x2,
334 PEEP_TAILCALL_TRAPS = 0x4,
335 PEEP_USELESS_BRANCHES = 0x8,
336 PEEP_ALL = 0xf
337 };
338
339private:
340 uint64_t NumDoubleJumps{0};
341 uint64_t TailCallTraps{0};
342 uint64_t NumUselessCondBranches{0};
343
344 /// Add trap instructions immediately after indirect tail calls to prevent
345 /// the processor from decoding instructions immediate following the
346 /// tailcall.
347 void addTailcallTraps(BinaryFunction &Function);
348
349 /// Remove useless duplicate successors. When the conditional
350 /// successor is the same as the unconditional successor, we can
351 /// remove the conditional successor and branch instruction.
352 void removeUselessCondBranches(BinaryFunction &Function);
353
354public:
355 explicit Peepholes(const cl::opt<bool> &PrintPass)
356 : BinaryFunctionPass(PrintPass) {}
357
358 const char *getName() const override { return "peepholes"; }
359 Error runOnFunctions(BinaryContext &BC) override;
360};
361
362/// An optimization to simplify loads from read-only sections.The pass converts
363/// load instructions with statically computed target address such as:
364///
365/// mov 0x12f(%rip), %eax
366///
367/// to their counterparts that use immediate operands instead of memory loads:
368///
369/// mov $0x4007dc, %eax
370///
371/// when the target address points somewhere inside a read-only section.
372///
373class SimplifyRODataLoads : public BinaryFunctionPass {
374 uint64_t NumLoadsSimplified{0};
375 uint64_t NumDynamicLoadsSimplified{0};
376 uint64_t NumLoadsFound{0};
377 uint64_t NumDynamicLoadsFound{0};
378 std::unordered_set<const BinaryFunction *> Modified;
379
380 bool simplifyRODataLoads(BinaryFunction &BF);
381
382public:
383 explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
384 : BinaryFunctionPass(PrintPass) {}
385
386 const char *getName() const override { return "simplify-read-only-loads"; }
387 bool shouldPrint(const BinaryFunction &BF) const override {
388 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
389 }
390 Error runOnFunctions(BinaryContext &BC) override;
391};
392
393/// Assign output sections to all functions.
394class AssignSections : public BinaryFunctionPass {
395public:
396 explicit AssignSections() : BinaryFunctionPass(false) {}
397
398 const char *getName() const override { return "assign-sections"; }
399 Error runOnFunctions(BinaryContext &BC) override;
400};
401
402/// Compute and report to the user the imbalance in flow equations for all
403/// CFGs, so we can detect bad quality profile. Prints average and standard
404/// deviation of the absolute differences of outgoing flow minus incoming flow
405/// for blocks of interest (excluding prologues, epilogues, and BB frequency
406/// lower than 100).
407class PrintProfileStats : public BinaryFunctionPass {
408public:
409 explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
410 : BinaryFunctionPass(PrintPass) {}
411
412 const char *getName() const override { return "profile-stats"; }
413 bool shouldPrint(const BinaryFunction &) const override { return false; }
414 Error runOnFunctions(BinaryContext &BC) override;
415};
416
417/// Prints a list of the top 100 functions sorted by a set of
418/// dyno stats categories.
419class PrintProgramStats : public BinaryFunctionPass {
420 BoltAddressTranslation *BAT = nullptr;
421
422public:
423 explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr)
424 : BinaryFunctionPass(false), BAT(BAT) {}
425
426 const char *getName() const override { return "print-stats"; }
427 bool shouldPrint(const BinaryFunction &) const override { return false; }
428 Error runOnFunctions(BinaryContext &BC) override;
429};
430
431/// Pass for lowering any instructions that we have raised and that have
432/// to be lowered.
433class InstructionLowering : public BinaryFunctionPass {
434public:
435 explicit InstructionLowering(const cl::opt<bool> &PrintPass)
436 : BinaryFunctionPass(PrintPass) {}
437
438 const char *getName() const override { return "inst-lowering"; }
439
440 Error runOnFunctions(BinaryContext &BC) override;
441};
442
443/// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
444class StripRepRet : public BinaryFunctionPass {
445public:
446 explicit StripRepRet(const cl::opt<bool> &PrintPass)
447 : BinaryFunctionPass(PrintPass) {}
448
449 const char *getName() const override { return "strip-rep-ret"; }
450
451 Error runOnFunctions(BinaryContext &BC) override;
452};
453
454/// Pass for inlining calls to memcpy using 'rep movsb' on X86.
455class InlineMemcpy : public BinaryFunctionPass {
456public:
457 explicit InlineMemcpy(const cl::opt<bool> &PrintPass)
458 : BinaryFunctionPass(PrintPass) {}
459
460 const char *getName() const override { return "inline-memcpy"; }
461
462 Error runOnFunctions(BinaryContext &BC) override;
463};
464
465/// Pass for specializing memcpy for a size of 1 byte.
466class SpecializeMemcpy1 : public BinaryFunctionPass {
467private:
468 std::vector<std::string> Spec;
469
470 /// Return indices of the call sites to optimize. Count starts at 1.
471 /// Returns an empty set for all call sites in the function.
472 std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const;
473
474public:
475 explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass,
476 cl::list<std::string> &Spec)
477 : BinaryFunctionPass(PrintPass), Spec(Spec) {}
478
479 bool shouldOptimize(const BinaryFunction &BF) const override;
480
481 const char *getName() const override { return "specialize-memcpy"; }
482
483 Error runOnFunctions(BinaryContext &BC) override;
484};
485
486/// Pass to remove nops in code
487class RemoveNops : public BinaryFunctionPass {
488 void runOnFunction(BinaryFunction &Function);
489
490public:
491 explicit RemoveNops(const cl::opt<bool> &PrintPass)
492 : BinaryFunctionPass(PrintPass) {}
493
494 const char *getName() const override { return "remove-nops"; }
495
496 /// Pass entry point
497 Error runOnFunctions(BinaryContext &BC) override;
498};
499
500enum FrameOptimizationType : char {
501 FOP_NONE, /// Don't perform FOP.
502 FOP_HOT, /// Perform FOP on hot functions.
503 FOP_ALL /// Perform FOP on all functions.
504};
505
506} // namespace bolt
507} // namespace llvm
508
509#endif
510

source code of bolt/include/bolt/Passes/BinaryPasses.h