1 | //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The set of optimization/analysis passes that run on BinaryFunctions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef BOLT_PASSES_BINARY_PASSES_H |
14 | #define BOLT_PASSES_BINARY_PASSES_H |
15 | |
16 | #include "bolt/Core/BinaryContext.h" |
17 | #include "bolt/Core/BinaryFunction.h" |
18 | #include "bolt/Core/DynoStats.h" |
19 | #include "bolt/Profile/BoltAddressTranslation.h" |
20 | #include "llvm/Support/CommandLine.h" |
21 | #include <atomic> |
22 | #include <set> |
23 | #include <string> |
24 | #include <unordered_set> |
25 | |
26 | namespace llvm { |
27 | namespace bolt { |
28 | |
29 | /// An optimization/analysis pass that runs on functions. |
30 | class BinaryFunctionPass { |
31 | protected: |
32 | bool PrintPass; |
33 | |
34 | explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {} |
35 | |
36 | /// Control whether a specific function should be skipped during |
37 | /// optimization. |
38 | virtual bool shouldOptimize(const BinaryFunction &BF) const; |
39 | |
40 | public: |
41 | virtual ~BinaryFunctionPass() = default; |
42 | |
43 | /// The name of this pass |
44 | virtual const char *getName() const = 0; |
45 | |
46 | /// Control whether debug info is printed after this pass is completed. |
47 | bool printPass() const { return PrintPass; } |
48 | |
49 | /// Control whether debug info is printed for an individual function after |
50 | /// this pass is completed (printPass() must have returned true). |
51 | virtual bool shouldPrint(const BinaryFunction &BF) const; |
52 | |
53 | virtual Error runOnFunctions(BinaryContext &BC) = 0; |
54 | }; |
55 | |
56 | /// A pass to set initial program-wide dynostats. |
57 | class DynoStatsSetPass : public BinaryFunctionPass { |
58 | public: |
59 | DynoStatsSetPass() : BinaryFunctionPass(false) {} |
60 | |
61 | const char *getName() const override { |
62 | return "set dyno-stats before optimizations" ; |
63 | } |
64 | |
65 | bool shouldPrint(const BinaryFunction &BF) const override { return false; } |
66 | |
67 | Error runOnFunctions(BinaryContext &BC) override { |
68 | BC.InitialDynoStats = getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64()); |
69 | return Error::success(); |
70 | } |
71 | }; |
72 | |
73 | /// A pass to print program-wide dynostats. |
74 | class DynoStatsPrintPass : public BinaryFunctionPass { |
75 | protected: |
76 | std::string Title; |
77 | |
78 | public: |
79 | DynoStatsPrintPass(const char *Title) |
80 | : BinaryFunctionPass(false), Title(Title) {} |
81 | |
82 | const char *getName() const override { |
83 | return "print dyno-stats after optimizations" ; |
84 | } |
85 | |
86 | bool shouldPrint(const BinaryFunction &BF) const override { return false; } |
87 | |
88 | Error runOnFunctions(BinaryContext &BC) override { |
89 | const DynoStats PrevDynoStats = BC.InitialDynoStats; |
90 | const DynoStats NewDynoStats = |
91 | getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64()); |
92 | const bool Changed = (NewDynoStats != PrevDynoStats); |
93 | BC.outs() << "BOLT-INFO: program-wide dynostats " << Title |
94 | << (Changed ? "" : " (no change)" ) << ":\n\n" |
95 | << PrevDynoStats; |
96 | if (Changed) { |
97 | BC.outs() << '\n'; |
98 | NewDynoStats.print(OS&: BC.outs(), Other: &PrevDynoStats, Printer: BC.InstPrinter.get()); |
99 | } |
100 | BC.outs() << '\n'; |
101 | return Error::success(); |
102 | } |
103 | }; |
104 | |
105 | /// The pass normalizes CFG by performing the following transformations: |
106 | /// * removes empty basic blocks |
107 | /// * merges duplicate edges and updates jump instructions |
108 | class NormalizeCFG : public BinaryFunctionPass { |
109 | std::atomic<uint64_t> NumBlocksRemoved{0}; |
110 | std::atomic<uint64_t> NumDuplicateEdgesMerged{0}; |
111 | |
112 | void runOnFunction(BinaryFunction &BF); |
113 | |
114 | public: |
115 | NormalizeCFG(const cl::opt<bool> &PrintPass) |
116 | : BinaryFunctionPass(PrintPass) {} |
117 | |
118 | const char *getName() const override { return "normalize CFG" ; } |
119 | |
120 | Error runOnFunctions(BinaryContext &) override; |
121 | }; |
122 | |
123 | /// Detect and eliminate unreachable basic blocks. We could have those |
124 | /// filled with nops and they are used for alignment. |
125 | class EliminateUnreachableBlocks : public BinaryFunctionPass { |
126 | std::unordered_set<const BinaryFunction *> Modified; |
127 | std::atomic<unsigned> DeletedBlocks{0}; |
128 | std::atomic<uint64_t> DeletedBytes{0}; |
129 | void runOnFunction(BinaryFunction &Function); |
130 | |
131 | public: |
132 | EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass) |
133 | : BinaryFunctionPass(PrintPass) {} |
134 | |
135 | const char *getName() const override { return "eliminate-unreachable" ; } |
136 | bool shouldPrint(const BinaryFunction &BF) const override { |
137 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
138 | } |
139 | Error runOnFunctions(BinaryContext &) override; |
140 | }; |
141 | |
142 | // Reorder the basic blocks for each function based on hotness. |
143 | class ReorderBasicBlocks : public BinaryFunctionPass { |
144 | public: |
145 | /// Choose which strategy should the block layout heuristic prioritize when |
146 | /// facing conflicting goals. |
147 | enum LayoutType : char { |
148 | /// LT_NONE - do not change layout of basic blocks |
149 | LT_NONE = 0, /// no reordering |
150 | /// LT_REVERSE - reverse the order of basic blocks, meant for testing |
151 | /// purposes. The first basic block is left intact and the rest are |
152 | /// put in the reverse order. |
153 | LT_REVERSE, |
154 | /// LT_OPTIMIZE - optimize layout of basic blocks based on profile. |
155 | LT_OPTIMIZE, |
156 | /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis' |
157 | /// paper (PLDI '90) about block reordering, trying to minimize branch |
158 | /// mispredictions. |
159 | LT_OPTIMIZE_BRANCH, |
160 | /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04) |
161 | /// that suggests putting frequently executed chains first in the layout. |
162 | LT_OPTIMIZE_CACHE, |
163 | // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation. |
164 | LT_OPTIMIZE_CACHE_PLUS, |
165 | /// Block reordering guided by the extended TSP metric. |
166 | LT_OPTIMIZE_EXT_TSP, |
167 | /// Create clusters and use random order for them. |
168 | LT_OPTIMIZE_SHUFFLE, |
169 | }; |
170 | |
171 | private: |
172 | /// Run the specified layout algorithm on the given function. Returns `true` |
173 | /// if the order of blocks was changed. |
174 | bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, |
175 | bool MinBranchClusters) const; |
176 | |
177 | public: |
178 | explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass) |
179 | : BinaryFunctionPass(PrintPass) {} |
180 | |
181 | bool shouldOptimize(const BinaryFunction &BF) const override; |
182 | |
183 | const char *getName() const override { return "reorder-blocks" ; } |
184 | bool shouldPrint(const BinaryFunction &BF) const override; |
185 | Error runOnFunctions(BinaryContext &BC) override; |
186 | }; |
187 | |
188 | /// Sync local branches with CFG. |
189 | class FixupBranches : public BinaryFunctionPass { |
190 | public: |
191 | explicit FixupBranches(const cl::opt<bool> &PrintPass) |
192 | : BinaryFunctionPass(PrintPass) {} |
193 | |
194 | const char *getName() const override { return "fix-branches" ; } |
195 | Error runOnFunctions(BinaryContext &BC) override; |
196 | }; |
197 | |
198 | /// Fix the CFI state and exception handling information after all other |
199 | /// passes have completed. |
200 | class FinalizeFunctions : public BinaryFunctionPass { |
201 | public: |
202 | explicit FinalizeFunctions(const cl::opt<bool> &PrintPass) |
203 | : BinaryFunctionPass(PrintPass) {} |
204 | |
205 | const char *getName() const override { return "finalize-functions" ; } |
206 | Error runOnFunctions(BinaryContext &BC) override; |
207 | }; |
208 | |
209 | /// Perform any necessary adjustments for functions that do not fit into their |
210 | /// original space in non-relocation mode. |
211 | class CheckLargeFunctions : public BinaryFunctionPass { |
212 | public: |
213 | explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass) |
214 | : BinaryFunctionPass(PrintPass) {} |
215 | |
216 | const char *getName() const override { return "check-large-functions" ; } |
217 | |
218 | Error runOnFunctions(BinaryContext &BC) override; |
219 | |
220 | bool shouldOptimize(const BinaryFunction &BF) const override; |
221 | }; |
222 | |
223 | /// Convert and remove all BOLT-related annotations before LLVM code emission. |
224 | class LowerAnnotations : public BinaryFunctionPass { |
225 | public: |
226 | explicit LowerAnnotations(const cl::opt<bool> &PrintPass) |
227 | : BinaryFunctionPass(PrintPass) {} |
228 | |
229 | const char *getName() const override { return "lower-annotations" ; } |
230 | Error runOnFunctions(BinaryContext &BC) override; |
231 | }; |
232 | |
233 | /// Clean the state of the MC representation before sending it to emission |
234 | class CleanMCState : public BinaryFunctionPass { |
235 | public: |
236 | explicit CleanMCState(const cl::opt<bool> &PrintPass) |
237 | : BinaryFunctionPass(PrintPass) {} |
238 | |
239 | const char *getName() const override { return "clean-mc-state" ; } |
240 | Error runOnFunctions(BinaryContext &BC) override; |
241 | }; |
242 | |
243 | /// An optimization to simplify conditional tail calls by removing |
244 | /// unnecessary branches. |
245 | /// |
246 | /// This optimization considers both of the following cases: |
247 | /// |
248 | /// foo: ... |
249 | /// jcc L1 original |
250 | /// ... |
251 | /// L1: jmp bar # TAILJMP |
252 | /// |
253 | /// -> |
254 | /// |
255 | /// foo: ... |
256 | /// jcc bar iff jcc L1 is expected |
257 | /// ... |
258 | /// |
259 | /// L1 is unreachable |
260 | /// |
261 | /// OR |
262 | /// |
263 | /// foo: ... |
264 | /// jcc L2 |
265 | /// L1: jmp dest # TAILJMP |
266 | /// L2: ... |
267 | /// |
268 | /// -> |
269 | /// |
270 | /// foo: jncc dest # TAILJMP |
271 | /// L2: ... |
272 | /// |
273 | /// L1 is unreachable |
274 | /// |
275 | /// For this particular case, the first basic block ends with |
276 | /// a conditional branch and has two successors, one fall-through |
277 | /// and one for when the condition is true. |
278 | /// The target of the conditional is a basic block with a single |
279 | /// unconditional branch (i.e. tail call) to another function. |
280 | /// We don't care about the contents of the fall-through block. |
281 | /// We assume that the target of the conditional branch is the |
282 | /// first successor. |
283 | class SimplifyConditionalTailCalls : public BinaryFunctionPass { |
284 | uint64_t NumCandidateTailCalls{0}; |
285 | uint64_t NumTailCallsPatched{0}; |
286 | uint64_t CTCExecCount{0}; |
287 | uint64_t CTCTakenCount{0}; |
288 | uint64_t NumOrigForwardBranches{0}; |
289 | uint64_t NumOrigBackwardBranches{0}; |
290 | uint64_t NumDoubleJumps{0}; |
291 | uint64_t DeletedBlocks{0}; |
292 | uint64_t DeletedBytes{0}; |
293 | std::unordered_set<const BinaryFunction *> Modified; |
294 | std::set<const BinaryBasicBlock *> BeenOptimized; |
295 | |
296 | bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, |
297 | const MCInst &CondBranch, const BinaryBasicBlock *BB, |
298 | const bool DirectionFlag); |
299 | |
300 | uint64_t fixTailCalls(BinaryFunction &BF); |
301 | |
302 | public: |
303 | explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass) |
304 | : BinaryFunctionPass(PrintPass) {} |
305 | |
306 | const char *getName() const override { |
307 | return "simplify-conditional-tail-calls" ; |
308 | } |
309 | bool shouldPrint(const BinaryFunction &BF) const override { |
310 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
311 | } |
312 | Error runOnFunctions(BinaryContext &BC) override; |
313 | }; |
314 | |
315 | /// Convert instructions to the form with the minimum operand width. |
316 | class ShortenInstructions : public BinaryFunctionPass { |
317 | uint64_t shortenInstructions(BinaryFunction &Function); |
318 | |
319 | public: |
320 | explicit ShortenInstructions(const cl::opt<bool> &PrintPass) |
321 | : BinaryFunctionPass(PrintPass) {} |
322 | |
323 | const char *getName() const override { return "shorten-instructions" ; } |
324 | |
325 | Error runOnFunctions(BinaryContext &BC) override; |
326 | }; |
327 | |
328 | /// Perform simple peephole optimizations. |
329 | class Peepholes : public BinaryFunctionPass { |
330 | public: |
331 | enum PeepholeOpts : char { |
332 | PEEP_NONE = 0x0, |
333 | PEEP_DOUBLE_JUMPS = 0x2, |
334 | PEEP_TAILCALL_TRAPS = 0x4, |
335 | PEEP_USELESS_BRANCHES = 0x8, |
336 | PEEP_ALL = 0xf |
337 | }; |
338 | |
339 | private: |
340 | uint64_t NumDoubleJumps{0}; |
341 | uint64_t TailCallTraps{0}; |
342 | uint64_t NumUselessCondBranches{0}; |
343 | |
344 | /// Add trap instructions immediately after indirect tail calls to prevent |
345 | /// the processor from decoding instructions immediate following the |
346 | /// tailcall. |
347 | void addTailcallTraps(BinaryFunction &Function); |
348 | |
349 | /// Remove useless duplicate successors. When the conditional |
350 | /// successor is the same as the unconditional successor, we can |
351 | /// remove the conditional successor and branch instruction. |
352 | void removeUselessCondBranches(BinaryFunction &Function); |
353 | |
354 | public: |
355 | explicit Peepholes(const cl::opt<bool> &PrintPass) |
356 | : BinaryFunctionPass(PrintPass) {} |
357 | |
358 | const char *getName() const override { return "peepholes" ; } |
359 | Error runOnFunctions(BinaryContext &BC) override; |
360 | }; |
361 | |
362 | /// An optimization to simplify loads from read-only sections.The pass converts |
363 | /// load instructions with statically computed target address such as: |
364 | /// |
365 | /// mov 0x12f(%rip), %eax |
366 | /// |
367 | /// to their counterparts that use immediate operands instead of memory loads: |
368 | /// |
369 | /// mov $0x4007dc, %eax |
370 | /// |
371 | /// when the target address points somewhere inside a read-only section. |
372 | /// |
373 | class SimplifyRODataLoads : public BinaryFunctionPass { |
374 | uint64_t NumLoadsSimplified{0}; |
375 | uint64_t NumDynamicLoadsSimplified{0}; |
376 | uint64_t NumLoadsFound{0}; |
377 | uint64_t NumDynamicLoadsFound{0}; |
378 | std::unordered_set<const BinaryFunction *> Modified; |
379 | |
380 | bool simplifyRODataLoads(BinaryFunction &BF); |
381 | |
382 | public: |
383 | explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass) |
384 | : BinaryFunctionPass(PrintPass) {} |
385 | |
386 | const char *getName() const override { return "simplify-read-only-loads" ; } |
387 | bool shouldPrint(const BinaryFunction &BF) const override { |
388 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
389 | } |
390 | Error runOnFunctions(BinaryContext &BC) override; |
391 | }; |
392 | |
393 | /// Assign output sections to all functions. |
394 | class AssignSections : public BinaryFunctionPass { |
395 | public: |
396 | explicit AssignSections() : BinaryFunctionPass(false) {} |
397 | |
398 | const char *getName() const override { return "assign-sections" ; } |
399 | Error runOnFunctions(BinaryContext &BC) override; |
400 | }; |
401 | |
402 | /// Compute and report to the user the imbalance in flow equations for all |
403 | /// CFGs, so we can detect bad quality profile. Prints average and standard |
404 | /// deviation of the absolute differences of outgoing flow minus incoming flow |
405 | /// for blocks of interest (excluding prologues, epilogues, and BB frequency |
406 | /// lower than 100). |
407 | class PrintProfileStats : public BinaryFunctionPass { |
408 | public: |
409 | explicit PrintProfileStats(const cl::opt<bool> &PrintPass) |
410 | : BinaryFunctionPass(PrintPass) {} |
411 | |
412 | const char *getName() const override { return "profile-stats" ; } |
413 | bool shouldPrint(const BinaryFunction &) const override { return false; } |
414 | Error runOnFunctions(BinaryContext &BC) override; |
415 | }; |
416 | |
417 | /// Prints a list of the top 100 functions sorted by a set of |
418 | /// dyno stats categories. |
419 | class PrintProgramStats : public BinaryFunctionPass { |
420 | BoltAddressTranslation *BAT = nullptr; |
421 | |
422 | public: |
423 | explicit PrintProgramStats(BoltAddressTranslation *BAT = nullptr) |
424 | : BinaryFunctionPass(false), BAT(BAT) {} |
425 | |
426 | const char *getName() const override { return "print-stats" ; } |
427 | bool shouldPrint(const BinaryFunction &) const override { return false; } |
428 | Error runOnFunctions(BinaryContext &BC) override; |
429 | }; |
430 | |
431 | /// Pass for lowering any instructions that we have raised and that have |
432 | /// to be lowered. |
433 | class InstructionLowering : public BinaryFunctionPass { |
434 | public: |
435 | explicit InstructionLowering(const cl::opt<bool> &PrintPass) |
436 | : BinaryFunctionPass(PrintPass) {} |
437 | |
438 | const char *getName() const override { return "inst-lowering" ; } |
439 | |
440 | Error runOnFunctions(BinaryContext &BC) override; |
441 | }; |
442 | |
443 | /// Pass for stripping 'repz' from 'repz retq' sequence of instructions. |
444 | class StripRepRet : public BinaryFunctionPass { |
445 | public: |
446 | explicit StripRepRet(const cl::opt<bool> &PrintPass) |
447 | : BinaryFunctionPass(PrintPass) {} |
448 | |
449 | const char *getName() const override { return "strip-rep-ret" ; } |
450 | |
451 | Error runOnFunctions(BinaryContext &BC) override; |
452 | }; |
453 | |
454 | /// Pass for inlining calls to memcpy using 'rep movsb' on X86. |
455 | class InlineMemcpy : public BinaryFunctionPass { |
456 | public: |
457 | explicit InlineMemcpy(const cl::opt<bool> &PrintPass) |
458 | : BinaryFunctionPass(PrintPass) {} |
459 | |
460 | const char *getName() const override { return "inline-memcpy" ; } |
461 | |
462 | Error runOnFunctions(BinaryContext &BC) override; |
463 | }; |
464 | |
465 | /// Pass for specializing memcpy for a size of 1 byte. |
466 | class SpecializeMemcpy1 : public BinaryFunctionPass { |
467 | private: |
468 | std::vector<std::string> Spec; |
469 | |
470 | /// Return indices of the call sites to optimize. Count starts at 1. |
471 | /// Returns an empty set for all call sites in the function. |
472 | std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const; |
473 | |
474 | public: |
475 | explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass, |
476 | cl::list<std::string> &Spec) |
477 | : BinaryFunctionPass(PrintPass), Spec(Spec) {} |
478 | |
479 | bool shouldOptimize(const BinaryFunction &BF) const override; |
480 | |
481 | const char *getName() const override { return "specialize-memcpy" ; } |
482 | |
483 | Error runOnFunctions(BinaryContext &BC) override; |
484 | }; |
485 | |
486 | /// Pass to remove nops in code |
487 | class RemoveNops : public BinaryFunctionPass { |
488 | void runOnFunction(BinaryFunction &Function); |
489 | |
490 | public: |
491 | explicit RemoveNops(const cl::opt<bool> &PrintPass) |
492 | : BinaryFunctionPass(PrintPass) {} |
493 | |
494 | const char *getName() const override { return "remove-nops" ; } |
495 | |
496 | /// Pass entry point |
497 | Error runOnFunctions(BinaryContext &BC) override; |
498 | }; |
499 | |
500 | enum FrameOptimizationType : char { |
501 | FOP_NONE, /// Don't perform FOP. |
502 | FOP_HOT, /// Perform FOP on hot functions. |
503 | FOP_ALL /// Perform FOP on all functions. |
504 | }; |
505 | |
506 | } // namespace bolt |
507 | } // namespace llvm |
508 | |
509 | #endif |
510 | |