1//===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// The set of optimization/analysis passes that run on BinaryFunctions.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef BOLT_PASSES_BINARY_PASSES_H
14#define BOLT_PASSES_BINARY_PASSES_H
15
16#include "bolt/Core/BinaryContext.h"
17#include "bolt/Core/BinaryFunction.h"
18#include "bolt/Core/DynoStats.h"
19#include "llvm/Support/CommandLine.h"
20#include <atomic>
21#include <set>
22#include <string>
23#include <unordered_set>
24
25namespace llvm {
26namespace bolt {
27
28/// An optimization/analysis pass that runs on functions.
29class BinaryFunctionPass {
30protected:
31 bool PrintPass;
32
33 explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {}
34
35 /// Control whether a specific function should be skipped during
36 /// optimization.
37 virtual bool shouldOptimize(const BinaryFunction &BF) const;
38
39public:
40 virtual ~BinaryFunctionPass() = default;
41
42 /// The name of this pass
43 virtual const char *getName() const = 0;
44
45 /// Control whether debug info is printed after this pass is completed.
46 bool printPass() const { return PrintPass; }
47
48 /// Control whether debug info is printed for an individual function after
49 /// this pass is completed (printPass() must have returned true).
50 virtual bool shouldPrint(const BinaryFunction &BF) const;
51
52 virtual Error runOnFunctions(BinaryContext &BC) = 0;
53};
54
55/// A pass to print program-wide dynostats.
56class DynoStatsPrintPass : public BinaryFunctionPass {
57protected:
58 DynoStats PrevDynoStats;
59 std::string Title;
60
61public:
62 DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title)
63 : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {}
64
65 const char *getName() const override {
66 return "print dyno-stats after optimizations";
67 }
68
69 bool shouldPrint(const BinaryFunction &BF) const override { return false; }
70
71 Error runOnFunctions(BinaryContext &BC) override {
72 const DynoStats NewDynoStats =
73 getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64());
74 const bool Changed = (NewDynoStats != PrevDynoStats);
75 BC.outs() << "BOLT-INFO: program-wide dynostats " << Title
76 << (Changed ? "" : " (no change)") << ":\n\n"
77 << PrevDynoStats;
78 if (Changed) {
79 BC.outs() << '\n';
80 NewDynoStats.print(OS&: BC.outs(), Other: &PrevDynoStats, Printer: BC.InstPrinter.get());
81 }
82 BC.outs() << '\n';
83 return Error::success();
84 }
85};
86
87/// The pass normalizes CFG by performing the following transformations:
88/// * removes empty basic blocks
89/// * merges duplicate edges and updates jump instructions
90class NormalizeCFG : public BinaryFunctionPass {
91 std::atomic<uint64_t> NumBlocksRemoved{0};
92 std::atomic<uint64_t> NumDuplicateEdgesMerged{0};
93
94 void runOnFunction(BinaryFunction &BF);
95
96public:
97 NormalizeCFG(const cl::opt<bool> &PrintPass)
98 : BinaryFunctionPass(PrintPass) {}
99
100 const char *getName() const override { return "normalize CFG"; }
101
102 Error runOnFunctions(BinaryContext &) override;
103};
104
105/// Detect and eliminate unreachable basic blocks. We could have those
106/// filled with nops and they are used for alignment.
107class EliminateUnreachableBlocks : public BinaryFunctionPass {
108 std::unordered_set<const BinaryFunction *> Modified;
109 std::atomic<unsigned> DeletedBlocks{0};
110 std::atomic<uint64_t> DeletedBytes{0};
111 void runOnFunction(BinaryFunction &Function);
112
113public:
114 EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass)
115 : BinaryFunctionPass(PrintPass) {}
116
117 const char *getName() const override { return "eliminate-unreachable"; }
118 bool shouldPrint(const BinaryFunction &BF) const override {
119 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
120 }
121 Error runOnFunctions(BinaryContext &) override;
122};
123
124// Reorder the basic blocks for each function based on hotness.
125class ReorderBasicBlocks : public BinaryFunctionPass {
126public:
127 /// Choose which strategy should the block layout heuristic prioritize when
128 /// facing conflicting goals.
129 enum LayoutType : char {
130 /// LT_NONE - do not change layout of basic blocks
131 LT_NONE = 0, /// no reordering
132 /// LT_REVERSE - reverse the order of basic blocks, meant for testing
133 /// purposes. The first basic block is left intact and the rest are
134 /// put in the reverse order.
135 LT_REVERSE,
136 /// LT_OPTIMIZE - optimize layout of basic blocks based on profile.
137 LT_OPTIMIZE,
138 /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis'
139 /// paper (PLDI '90) about block reordering, trying to minimize branch
140 /// mispredictions.
141 LT_OPTIMIZE_BRANCH,
142 /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04)
143 /// that suggests putting frequently executed chains first in the layout.
144 LT_OPTIMIZE_CACHE,
145 // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation.
146 LT_OPTIMIZE_CACHE_PLUS,
147 /// Block reordering guided by the extended TSP metric.
148 LT_OPTIMIZE_EXT_TSP,
149 /// Create clusters and use random order for them.
150 LT_OPTIMIZE_SHUFFLE,
151 };
152
153private:
154 /// Run the specified layout algorithm on the given function. Returns `true`
155 /// if the order of blocks was changed.
156 bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type,
157 bool MinBranchClusters) const;
158
159public:
160 explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass)
161 : BinaryFunctionPass(PrintPass) {}
162
163 bool shouldOptimize(const BinaryFunction &BF) const override;
164
165 const char *getName() const override { return "reorder-blocks"; }
166 bool shouldPrint(const BinaryFunction &BF) const override;
167 Error runOnFunctions(BinaryContext &BC) override;
168};
169
170/// Sync local branches with CFG.
171class FixupBranches : public BinaryFunctionPass {
172public:
173 explicit FixupBranches(const cl::opt<bool> &PrintPass)
174 : BinaryFunctionPass(PrintPass) {}
175
176 const char *getName() const override { return "fix-branches"; }
177 Error runOnFunctions(BinaryContext &BC) override;
178};
179
180/// Fix the CFI state and exception handling information after all other
181/// passes have completed.
182class FinalizeFunctions : public BinaryFunctionPass {
183public:
184 explicit FinalizeFunctions(const cl::opt<bool> &PrintPass)
185 : BinaryFunctionPass(PrintPass) {}
186
187 const char *getName() const override { return "finalize-functions"; }
188 Error runOnFunctions(BinaryContext &BC) override;
189};
190
191/// Perform any necessary adjustments for functions that do not fit into their
192/// original space in non-relocation mode.
193class CheckLargeFunctions : public BinaryFunctionPass {
194public:
195 explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass)
196 : BinaryFunctionPass(PrintPass) {}
197
198 const char *getName() const override { return "check-large-functions"; }
199
200 Error runOnFunctions(BinaryContext &BC) override;
201
202 bool shouldOptimize(const BinaryFunction &BF) const override;
203};
204
205/// Convert and remove all BOLT-related annotations before LLVM code emission.
206class LowerAnnotations : public BinaryFunctionPass {
207public:
208 explicit LowerAnnotations(const cl::opt<bool> &PrintPass)
209 : BinaryFunctionPass(PrintPass) {}
210
211 const char *getName() const override { return "lower-annotations"; }
212 Error runOnFunctions(BinaryContext &BC) override;
213};
214
215/// Clean the state of the MC representation before sending it to emission
216class CleanMCState : public BinaryFunctionPass {
217public:
218 explicit CleanMCState(const cl::opt<bool> &PrintPass)
219 : BinaryFunctionPass(PrintPass) {}
220
221 const char *getName() const override { return "clean-mc-state"; }
222 Error runOnFunctions(BinaryContext &BC) override;
223};
224
225/// An optimization to simplify conditional tail calls by removing
226/// unnecessary branches.
227///
228/// This optimization considers both of the following cases:
229///
230/// foo: ...
231/// jcc L1 original
232/// ...
233/// L1: jmp bar # TAILJMP
234///
235/// ->
236///
237/// foo: ...
238/// jcc bar iff jcc L1 is expected
239/// ...
240///
241/// L1 is unreachable
242///
243/// OR
244///
245/// foo: ...
246/// jcc L2
247/// L1: jmp dest # TAILJMP
248/// L2: ...
249///
250/// ->
251///
252/// foo: jncc dest # TAILJMP
253/// L2: ...
254///
255/// L1 is unreachable
256///
257/// For this particular case, the first basic block ends with
258/// a conditional branch and has two successors, one fall-through
259/// and one for when the condition is true.
260/// The target of the conditional is a basic block with a single
261/// unconditional branch (i.e. tail call) to another function.
262/// We don't care about the contents of the fall-through block.
263/// We assume that the target of the conditional branch is the
264/// first successor.
265class SimplifyConditionalTailCalls : public BinaryFunctionPass {
266 uint64_t NumCandidateTailCalls{0};
267 uint64_t NumTailCallsPatched{0};
268 uint64_t CTCExecCount{0};
269 uint64_t CTCTakenCount{0};
270 uint64_t NumOrigForwardBranches{0};
271 uint64_t NumOrigBackwardBranches{0};
272 uint64_t NumDoubleJumps{0};
273 uint64_t DeletedBlocks{0};
274 uint64_t DeletedBytes{0};
275 std::unordered_set<const BinaryFunction *> Modified;
276 std::set<const BinaryBasicBlock *> BeenOptimized;
277
278 bool shouldRewriteBranch(const BinaryBasicBlock *PredBB,
279 const MCInst &CondBranch, const BinaryBasicBlock *BB,
280 const bool DirectionFlag);
281
282 uint64_t fixTailCalls(BinaryFunction &BF);
283
284public:
285 explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass)
286 : BinaryFunctionPass(PrintPass) {}
287
288 const char *getName() const override {
289 return "simplify-conditional-tail-calls";
290 }
291 bool shouldPrint(const BinaryFunction &BF) const override {
292 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
293 }
294 Error runOnFunctions(BinaryContext &BC) override;
295};
296
297/// Convert instructions to the form with the minimum operand width.
298class ShortenInstructions : public BinaryFunctionPass {
299 uint64_t shortenInstructions(BinaryFunction &Function);
300
301public:
302 explicit ShortenInstructions(const cl::opt<bool> &PrintPass)
303 : BinaryFunctionPass(PrintPass) {}
304
305 const char *getName() const override { return "shorten-instructions"; }
306
307 Error runOnFunctions(BinaryContext &BC) override;
308};
309
310/// Perform simple peephole optimizations.
311class Peepholes : public BinaryFunctionPass {
312public:
313 enum PeepholeOpts : char {
314 PEEP_NONE = 0x0,
315 PEEP_DOUBLE_JUMPS = 0x2,
316 PEEP_TAILCALL_TRAPS = 0x4,
317 PEEP_USELESS_BRANCHES = 0x8,
318 PEEP_ALL = 0xf
319 };
320
321private:
322 uint64_t NumDoubleJumps{0};
323 uint64_t TailCallTraps{0};
324 uint64_t NumUselessCondBranches{0};
325
326 /// Add trap instructions immediately after indirect tail calls to prevent
327 /// the processor from decoding instructions immediate following the
328 /// tailcall.
329 void addTailcallTraps(BinaryFunction &Function);
330
331 /// Remove useless duplicate successors. When the conditional
332 /// successor is the same as the unconditional successor, we can
333 /// remove the conditional successor and branch instruction.
334 void removeUselessCondBranches(BinaryFunction &Function);
335
336public:
337 explicit Peepholes(const cl::opt<bool> &PrintPass)
338 : BinaryFunctionPass(PrintPass) {}
339
340 const char *getName() const override { return "peepholes"; }
341 Error runOnFunctions(BinaryContext &BC) override;
342};
343
344/// An optimization to simplify loads from read-only sections.The pass converts
345/// load instructions with statically computed target address such as:
346///
347/// mov 0x12f(%rip), %eax
348///
349/// to their counterparts that use immediate operands instead of memory loads:
350///
351/// mov $0x4007dc, %eax
352///
353/// when the target address points somewhere inside a read-only section.
354///
355class SimplifyRODataLoads : public BinaryFunctionPass {
356 uint64_t NumLoadsSimplified{0};
357 uint64_t NumDynamicLoadsSimplified{0};
358 uint64_t NumLoadsFound{0};
359 uint64_t NumDynamicLoadsFound{0};
360 std::unordered_set<const BinaryFunction *> Modified;
361
362 bool simplifyRODataLoads(BinaryFunction &BF);
363
364public:
365 explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass)
366 : BinaryFunctionPass(PrintPass) {}
367
368 const char *getName() const override { return "simplify-read-only-loads"; }
369 bool shouldPrint(const BinaryFunction &BF) const override {
370 return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0;
371 }
372 Error runOnFunctions(BinaryContext &BC) override;
373};
374
375/// Assign output sections to all functions.
376class AssignSections : public BinaryFunctionPass {
377public:
378 explicit AssignSections() : BinaryFunctionPass(false) {}
379
380 const char *getName() const override { return "assign-sections"; }
381 Error runOnFunctions(BinaryContext &BC) override;
382};
383
384/// Compute and report to the user the imbalance in flow equations for all
385/// CFGs, so we can detect bad quality profile. Prints average and standard
386/// deviation of the absolute differences of outgoing flow minus incoming flow
387/// for blocks of interest (excluding prologues, epilogues, and BB frequency
388/// lower than 100).
389class PrintProfileStats : public BinaryFunctionPass {
390public:
391 explicit PrintProfileStats(const cl::opt<bool> &PrintPass)
392 : BinaryFunctionPass(PrintPass) {}
393
394 const char *getName() const override { return "profile-stats"; }
395 bool shouldPrint(const BinaryFunction &) const override { return false; }
396 Error runOnFunctions(BinaryContext &BC) override;
397};
398
399/// Prints a list of the top 100 functions sorted by a set of
400/// dyno stats categories.
401class PrintProgramStats : public BinaryFunctionPass {
402public:
403 explicit PrintProgramStats(const cl::opt<bool> &PrintPass)
404 : BinaryFunctionPass(PrintPass) {}
405
406 const char *getName() const override { return "print-stats"; }
407 bool shouldPrint(const BinaryFunction &) const override { return false; }
408 Error runOnFunctions(BinaryContext &BC) override;
409};
410
411/// Pass for lowering any instructions that we have raised and that have
412/// to be lowered.
413class InstructionLowering : public BinaryFunctionPass {
414public:
415 explicit InstructionLowering(const cl::opt<bool> &PrintPass)
416 : BinaryFunctionPass(PrintPass) {}
417
418 const char *getName() const override { return "inst-lowering"; }
419
420 Error runOnFunctions(BinaryContext &BC) override;
421};
422
423/// Pass for stripping 'repz' from 'repz retq' sequence of instructions.
424class StripRepRet : public BinaryFunctionPass {
425public:
426 explicit StripRepRet(const cl::opt<bool> &PrintPass)
427 : BinaryFunctionPass(PrintPass) {}
428
429 const char *getName() const override { return "strip-rep-ret"; }
430
431 Error runOnFunctions(BinaryContext &BC) override;
432};
433
434/// Pass for inlining calls to memcpy using 'rep movsb' on X86.
435class InlineMemcpy : public BinaryFunctionPass {
436public:
437 explicit InlineMemcpy(const cl::opt<bool> &PrintPass)
438 : BinaryFunctionPass(PrintPass) {}
439
440 const char *getName() const override { return "inline-memcpy"; }
441
442 Error runOnFunctions(BinaryContext &BC) override;
443};
444
445/// Pass for specializing memcpy for a size of 1 byte.
446class SpecializeMemcpy1 : public BinaryFunctionPass {
447private:
448 std::vector<std::string> Spec;
449
450 /// Return indices of the call sites to optimize. Count starts at 1.
451 /// Returns an empty set for all call sites in the function.
452 std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const;
453
454public:
455 explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass,
456 cl::list<std::string> &Spec)
457 : BinaryFunctionPass(PrintPass), Spec(Spec) {}
458
459 bool shouldOptimize(const BinaryFunction &BF) const override;
460
461 const char *getName() const override { return "specialize-memcpy"; }
462
463 Error runOnFunctions(BinaryContext &BC) override;
464};
465
466/// Pass to remove nops in code
467class RemoveNops : public BinaryFunctionPass {
468 void runOnFunction(BinaryFunction &Function);
469
470public:
471 explicit RemoveNops(const cl::opt<bool> &PrintPass)
472 : BinaryFunctionPass(PrintPass) {}
473
474 const char *getName() const override { return "remove-nops"; }
475
476 /// Pass entry point
477 Error runOnFunctions(BinaryContext &BC) override;
478};
479
480enum FrameOptimizationType : char {
481 FOP_NONE, /// Don't perform FOP.
482 FOP_HOT, /// Perform FOP on hot functions.
483 FOP_ALL /// Perform FOP on all functions.
484};
485
486} // namespace bolt
487} // namespace llvm
488
489#endif
490

source code of bolt/include/bolt/Passes/BinaryPasses.h