1 | //===- bolt/Passes/BinaryPasses.h - Binary-level passes ---------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // The set of optimization/analysis passes that run on BinaryFunctions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef BOLT_PASSES_BINARY_PASSES_H |
14 | #define BOLT_PASSES_BINARY_PASSES_H |
15 | |
16 | #include "bolt/Core/BinaryContext.h" |
17 | #include "bolt/Core/BinaryFunction.h" |
18 | #include "bolt/Core/DynoStats.h" |
19 | #include "llvm/Support/CommandLine.h" |
20 | #include <atomic> |
21 | #include <set> |
22 | #include <string> |
23 | #include <unordered_set> |
24 | |
25 | namespace llvm { |
26 | namespace bolt { |
27 | |
28 | /// An optimization/analysis pass that runs on functions. |
29 | class BinaryFunctionPass { |
30 | protected: |
31 | bool PrintPass; |
32 | |
33 | explicit BinaryFunctionPass(const bool PrintPass) : PrintPass(PrintPass) {} |
34 | |
35 | /// Control whether a specific function should be skipped during |
36 | /// optimization. |
37 | virtual bool shouldOptimize(const BinaryFunction &BF) const; |
38 | |
39 | public: |
40 | virtual ~BinaryFunctionPass() = default; |
41 | |
42 | /// The name of this pass |
43 | virtual const char *getName() const = 0; |
44 | |
45 | /// Control whether debug info is printed after this pass is completed. |
46 | bool printPass() const { return PrintPass; } |
47 | |
48 | /// Control whether debug info is printed for an individual function after |
49 | /// this pass is completed (printPass() must have returned true). |
50 | virtual bool shouldPrint(const BinaryFunction &BF) const; |
51 | |
52 | virtual Error runOnFunctions(BinaryContext &BC) = 0; |
53 | }; |
54 | |
55 | /// A pass to print program-wide dynostats. |
56 | class DynoStatsPrintPass : public BinaryFunctionPass { |
57 | protected: |
58 | DynoStats PrevDynoStats; |
59 | std::string Title; |
60 | |
61 | public: |
62 | DynoStatsPrintPass(const DynoStats &PrevDynoStats, const char *Title) |
63 | : BinaryFunctionPass(false), PrevDynoStats(PrevDynoStats), Title(Title) {} |
64 | |
65 | const char *getName() const override { |
66 | return "print dyno-stats after optimizations" ; |
67 | } |
68 | |
69 | bool shouldPrint(const BinaryFunction &BF) const override { return false; } |
70 | |
71 | Error runOnFunctions(BinaryContext &BC) override { |
72 | const DynoStats NewDynoStats = |
73 | getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64()); |
74 | const bool Changed = (NewDynoStats != PrevDynoStats); |
75 | BC.outs() << "BOLT-INFO: program-wide dynostats " << Title |
76 | << (Changed ? "" : " (no change)" ) << ":\n\n" |
77 | << PrevDynoStats; |
78 | if (Changed) { |
79 | BC.outs() << '\n'; |
80 | NewDynoStats.print(OS&: BC.outs(), Other: &PrevDynoStats, Printer: BC.InstPrinter.get()); |
81 | } |
82 | BC.outs() << '\n'; |
83 | return Error::success(); |
84 | } |
85 | }; |
86 | |
87 | /// The pass normalizes CFG by performing the following transformations: |
88 | /// * removes empty basic blocks |
89 | /// * merges duplicate edges and updates jump instructions |
90 | class NormalizeCFG : public BinaryFunctionPass { |
91 | std::atomic<uint64_t> NumBlocksRemoved{0}; |
92 | std::atomic<uint64_t> NumDuplicateEdgesMerged{0}; |
93 | |
94 | void runOnFunction(BinaryFunction &BF); |
95 | |
96 | public: |
97 | NormalizeCFG(const cl::opt<bool> &PrintPass) |
98 | : BinaryFunctionPass(PrintPass) {} |
99 | |
100 | const char *getName() const override { return "normalize CFG" ; } |
101 | |
102 | Error runOnFunctions(BinaryContext &) override; |
103 | }; |
104 | |
105 | /// Detect and eliminate unreachable basic blocks. We could have those |
106 | /// filled with nops and they are used for alignment. |
107 | class EliminateUnreachableBlocks : public BinaryFunctionPass { |
108 | std::unordered_set<const BinaryFunction *> Modified; |
109 | std::atomic<unsigned> DeletedBlocks{0}; |
110 | std::atomic<uint64_t> DeletedBytes{0}; |
111 | void runOnFunction(BinaryFunction &Function); |
112 | |
113 | public: |
114 | EliminateUnreachableBlocks(const cl::opt<bool> &PrintPass) |
115 | : BinaryFunctionPass(PrintPass) {} |
116 | |
117 | const char *getName() const override { return "eliminate-unreachable" ; } |
118 | bool shouldPrint(const BinaryFunction &BF) const override { |
119 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
120 | } |
121 | Error runOnFunctions(BinaryContext &) override; |
122 | }; |
123 | |
124 | // Reorder the basic blocks for each function based on hotness. |
125 | class ReorderBasicBlocks : public BinaryFunctionPass { |
126 | public: |
127 | /// Choose which strategy should the block layout heuristic prioritize when |
128 | /// facing conflicting goals. |
129 | enum LayoutType : char { |
130 | /// LT_NONE - do not change layout of basic blocks |
131 | LT_NONE = 0, /// no reordering |
132 | /// LT_REVERSE - reverse the order of basic blocks, meant for testing |
133 | /// purposes. The first basic block is left intact and the rest are |
134 | /// put in the reverse order. |
135 | LT_REVERSE, |
136 | /// LT_OPTIMIZE - optimize layout of basic blocks based on profile. |
137 | LT_OPTIMIZE, |
138 | /// LT_OPTIMIZE_BRANCH is an implementation of what is suggested in Pettis' |
139 | /// paper (PLDI '90) about block reordering, trying to minimize branch |
140 | /// mispredictions. |
141 | LT_OPTIMIZE_BRANCH, |
142 | /// LT_OPTIMIZE_CACHE piggybacks on the idea from Ispike paper (CGO '04) |
143 | /// that suggests putting frequently executed chains first in the layout. |
144 | LT_OPTIMIZE_CACHE, |
145 | // CACHE_PLUS and EXT_TSP are synonyms, emit warning of deprecation. |
146 | LT_OPTIMIZE_CACHE_PLUS, |
147 | /// Block reordering guided by the extended TSP metric. |
148 | LT_OPTIMIZE_EXT_TSP, |
149 | /// Create clusters and use random order for them. |
150 | LT_OPTIMIZE_SHUFFLE, |
151 | }; |
152 | |
153 | private: |
154 | /// Run the specified layout algorithm on the given function. Returns `true` |
155 | /// if the order of blocks was changed. |
156 | bool modifyFunctionLayout(BinaryFunction &Function, LayoutType Type, |
157 | bool MinBranchClusters) const; |
158 | |
159 | public: |
160 | explicit ReorderBasicBlocks(const cl::opt<bool> &PrintPass) |
161 | : BinaryFunctionPass(PrintPass) {} |
162 | |
163 | bool shouldOptimize(const BinaryFunction &BF) const override; |
164 | |
165 | const char *getName() const override { return "reorder-blocks" ; } |
166 | bool shouldPrint(const BinaryFunction &BF) const override; |
167 | Error runOnFunctions(BinaryContext &BC) override; |
168 | }; |
169 | |
170 | /// Sync local branches with CFG. |
171 | class FixupBranches : public BinaryFunctionPass { |
172 | public: |
173 | explicit FixupBranches(const cl::opt<bool> &PrintPass) |
174 | : BinaryFunctionPass(PrintPass) {} |
175 | |
176 | const char *getName() const override { return "fix-branches" ; } |
177 | Error runOnFunctions(BinaryContext &BC) override; |
178 | }; |
179 | |
180 | /// Fix the CFI state and exception handling information after all other |
181 | /// passes have completed. |
182 | class FinalizeFunctions : public BinaryFunctionPass { |
183 | public: |
184 | explicit FinalizeFunctions(const cl::opt<bool> &PrintPass) |
185 | : BinaryFunctionPass(PrintPass) {} |
186 | |
187 | const char *getName() const override { return "finalize-functions" ; } |
188 | Error runOnFunctions(BinaryContext &BC) override; |
189 | }; |
190 | |
191 | /// Perform any necessary adjustments for functions that do not fit into their |
192 | /// original space in non-relocation mode. |
193 | class CheckLargeFunctions : public BinaryFunctionPass { |
194 | public: |
195 | explicit CheckLargeFunctions(const cl::opt<bool> &PrintPass) |
196 | : BinaryFunctionPass(PrintPass) {} |
197 | |
198 | const char *getName() const override { return "check-large-functions" ; } |
199 | |
200 | Error runOnFunctions(BinaryContext &BC) override; |
201 | |
202 | bool shouldOptimize(const BinaryFunction &BF) const override; |
203 | }; |
204 | |
205 | /// Convert and remove all BOLT-related annotations before LLVM code emission. |
206 | class LowerAnnotations : public BinaryFunctionPass { |
207 | public: |
208 | explicit LowerAnnotations(const cl::opt<bool> &PrintPass) |
209 | : BinaryFunctionPass(PrintPass) {} |
210 | |
211 | const char *getName() const override { return "lower-annotations" ; } |
212 | Error runOnFunctions(BinaryContext &BC) override; |
213 | }; |
214 | |
215 | /// Clean the state of the MC representation before sending it to emission |
216 | class CleanMCState : public BinaryFunctionPass { |
217 | public: |
218 | explicit CleanMCState(const cl::opt<bool> &PrintPass) |
219 | : BinaryFunctionPass(PrintPass) {} |
220 | |
221 | const char *getName() const override { return "clean-mc-state" ; } |
222 | Error runOnFunctions(BinaryContext &BC) override; |
223 | }; |
224 | |
225 | /// An optimization to simplify conditional tail calls by removing |
226 | /// unnecessary branches. |
227 | /// |
228 | /// This optimization considers both of the following cases: |
229 | /// |
230 | /// foo: ... |
231 | /// jcc L1 original |
232 | /// ... |
233 | /// L1: jmp bar # TAILJMP |
234 | /// |
235 | /// -> |
236 | /// |
237 | /// foo: ... |
238 | /// jcc bar iff jcc L1 is expected |
239 | /// ... |
240 | /// |
241 | /// L1 is unreachable |
242 | /// |
243 | /// OR |
244 | /// |
245 | /// foo: ... |
246 | /// jcc L2 |
247 | /// L1: jmp dest # TAILJMP |
248 | /// L2: ... |
249 | /// |
250 | /// -> |
251 | /// |
252 | /// foo: jncc dest # TAILJMP |
253 | /// L2: ... |
254 | /// |
255 | /// L1 is unreachable |
256 | /// |
257 | /// For this particular case, the first basic block ends with |
258 | /// a conditional branch and has two successors, one fall-through |
259 | /// and one for when the condition is true. |
260 | /// The target of the conditional is a basic block with a single |
261 | /// unconditional branch (i.e. tail call) to another function. |
262 | /// We don't care about the contents of the fall-through block. |
263 | /// We assume that the target of the conditional branch is the |
264 | /// first successor. |
265 | class SimplifyConditionalTailCalls : public BinaryFunctionPass { |
266 | uint64_t NumCandidateTailCalls{0}; |
267 | uint64_t NumTailCallsPatched{0}; |
268 | uint64_t CTCExecCount{0}; |
269 | uint64_t CTCTakenCount{0}; |
270 | uint64_t NumOrigForwardBranches{0}; |
271 | uint64_t NumOrigBackwardBranches{0}; |
272 | uint64_t NumDoubleJumps{0}; |
273 | uint64_t DeletedBlocks{0}; |
274 | uint64_t DeletedBytes{0}; |
275 | std::unordered_set<const BinaryFunction *> Modified; |
276 | std::set<const BinaryBasicBlock *> BeenOptimized; |
277 | |
278 | bool shouldRewriteBranch(const BinaryBasicBlock *PredBB, |
279 | const MCInst &CondBranch, const BinaryBasicBlock *BB, |
280 | const bool DirectionFlag); |
281 | |
282 | uint64_t fixTailCalls(BinaryFunction &BF); |
283 | |
284 | public: |
285 | explicit SimplifyConditionalTailCalls(const cl::opt<bool> &PrintPass) |
286 | : BinaryFunctionPass(PrintPass) {} |
287 | |
288 | const char *getName() const override { |
289 | return "simplify-conditional-tail-calls" ; |
290 | } |
291 | bool shouldPrint(const BinaryFunction &BF) const override { |
292 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
293 | } |
294 | Error runOnFunctions(BinaryContext &BC) override; |
295 | }; |
296 | |
297 | /// Convert instructions to the form with the minimum operand width. |
298 | class ShortenInstructions : public BinaryFunctionPass { |
299 | uint64_t shortenInstructions(BinaryFunction &Function); |
300 | |
301 | public: |
302 | explicit ShortenInstructions(const cl::opt<bool> &PrintPass) |
303 | : BinaryFunctionPass(PrintPass) {} |
304 | |
305 | const char *getName() const override { return "shorten-instructions" ; } |
306 | |
307 | Error runOnFunctions(BinaryContext &BC) override; |
308 | }; |
309 | |
310 | /// Perform simple peephole optimizations. |
311 | class Peepholes : public BinaryFunctionPass { |
312 | public: |
313 | enum PeepholeOpts : char { |
314 | PEEP_NONE = 0x0, |
315 | PEEP_DOUBLE_JUMPS = 0x2, |
316 | PEEP_TAILCALL_TRAPS = 0x4, |
317 | PEEP_USELESS_BRANCHES = 0x8, |
318 | PEEP_ALL = 0xf |
319 | }; |
320 | |
321 | private: |
322 | uint64_t NumDoubleJumps{0}; |
323 | uint64_t TailCallTraps{0}; |
324 | uint64_t NumUselessCondBranches{0}; |
325 | |
326 | /// Add trap instructions immediately after indirect tail calls to prevent |
327 | /// the processor from decoding instructions immediate following the |
328 | /// tailcall. |
329 | void addTailcallTraps(BinaryFunction &Function); |
330 | |
331 | /// Remove useless duplicate successors. When the conditional |
332 | /// successor is the same as the unconditional successor, we can |
333 | /// remove the conditional successor and branch instruction. |
334 | void removeUselessCondBranches(BinaryFunction &Function); |
335 | |
336 | public: |
337 | explicit Peepholes(const cl::opt<bool> &PrintPass) |
338 | : BinaryFunctionPass(PrintPass) {} |
339 | |
340 | const char *getName() const override { return "peepholes" ; } |
341 | Error runOnFunctions(BinaryContext &BC) override; |
342 | }; |
343 | |
344 | /// An optimization to simplify loads from read-only sections.The pass converts |
345 | /// load instructions with statically computed target address such as: |
346 | /// |
347 | /// mov 0x12f(%rip), %eax |
348 | /// |
349 | /// to their counterparts that use immediate operands instead of memory loads: |
350 | /// |
351 | /// mov $0x4007dc, %eax |
352 | /// |
353 | /// when the target address points somewhere inside a read-only section. |
354 | /// |
355 | class SimplifyRODataLoads : public BinaryFunctionPass { |
356 | uint64_t NumLoadsSimplified{0}; |
357 | uint64_t NumDynamicLoadsSimplified{0}; |
358 | uint64_t NumLoadsFound{0}; |
359 | uint64_t NumDynamicLoadsFound{0}; |
360 | std::unordered_set<const BinaryFunction *> Modified; |
361 | |
362 | bool simplifyRODataLoads(BinaryFunction &BF); |
363 | |
364 | public: |
365 | explicit SimplifyRODataLoads(const cl::opt<bool> &PrintPass) |
366 | : BinaryFunctionPass(PrintPass) {} |
367 | |
368 | const char *getName() const override { return "simplify-read-only-loads" ; } |
369 | bool shouldPrint(const BinaryFunction &BF) const override { |
370 | return BinaryFunctionPass::shouldPrint(BF) && Modified.count(x: &BF) > 0; |
371 | } |
372 | Error runOnFunctions(BinaryContext &BC) override; |
373 | }; |
374 | |
375 | /// Assign output sections to all functions. |
376 | class AssignSections : public BinaryFunctionPass { |
377 | public: |
378 | explicit AssignSections() : BinaryFunctionPass(false) {} |
379 | |
380 | const char *getName() const override { return "assign-sections" ; } |
381 | Error runOnFunctions(BinaryContext &BC) override; |
382 | }; |
383 | |
384 | /// Compute and report to the user the imbalance in flow equations for all |
385 | /// CFGs, so we can detect bad quality profile. Prints average and standard |
386 | /// deviation of the absolute differences of outgoing flow minus incoming flow |
387 | /// for blocks of interest (excluding prologues, epilogues, and BB frequency |
388 | /// lower than 100). |
389 | class PrintProfileStats : public BinaryFunctionPass { |
390 | public: |
391 | explicit PrintProfileStats(const cl::opt<bool> &PrintPass) |
392 | : BinaryFunctionPass(PrintPass) {} |
393 | |
394 | const char *getName() const override { return "profile-stats" ; } |
395 | bool shouldPrint(const BinaryFunction &) const override { return false; } |
396 | Error runOnFunctions(BinaryContext &BC) override; |
397 | }; |
398 | |
399 | /// Prints a list of the top 100 functions sorted by a set of |
400 | /// dyno stats categories. |
401 | class PrintProgramStats : public BinaryFunctionPass { |
402 | public: |
403 | explicit PrintProgramStats(const cl::opt<bool> &PrintPass) |
404 | : BinaryFunctionPass(PrintPass) {} |
405 | |
406 | const char *getName() const override { return "print-stats" ; } |
407 | bool shouldPrint(const BinaryFunction &) const override { return false; } |
408 | Error runOnFunctions(BinaryContext &BC) override; |
409 | }; |
410 | |
411 | /// Pass for lowering any instructions that we have raised and that have |
412 | /// to be lowered. |
413 | class InstructionLowering : public BinaryFunctionPass { |
414 | public: |
415 | explicit InstructionLowering(const cl::opt<bool> &PrintPass) |
416 | : BinaryFunctionPass(PrintPass) {} |
417 | |
418 | const char *getName() const override { return "inst-lowering" ; } |
419 | |
420 | Error runOnFunctions(BinaryContext &BC) override; |
421 | }; |
422 | |
423 | /// Pass for stripping 'repz' from 'repz retq' sequence of instructions. |
424 | class StripRepRet : public BinaryFunctionPass { |
425 | public: |
426 | explicit StripRepRet(const cl::opt<bool> &PrintPass) |
427 | : BinaryFunctionPass(PrintPass) {} |
428 | |
429 | const char *getName() const override { return "strip-rep-ret" ; } |
430 | |
431 | Error runOnFunctions(BinaryContext &BC) override; |
432 | }; |
433 | |
434 | /// Pass for inlining calls to memcpy using 'rep movsb' on X86. |
435 | class InlineMemcpy : public BinaryFunctionPass { |
436 | public: |
437 | explicit InlineMemcpy(const cl::opt<bool> &PrintPass) |
438 | : BinaryFunctionPass(PrintPass) {} |
439 | |
440 | const char *getName() const override { return "inline-memcpy" ; } |
441 | |
442 | Error runOnFunctions(BinaryContext &BC) override; |
443 | }; |
444 | |
445 | /// Pass for specializing memcpy for a size of 1 byte. |
446 | class SpecializeMemcpy1 : public BinaryFunctionPass { |
447 | private: |
448 | std::vector<std::string> Spec; |
449 | |
450 | /// Return indices of the call sites to optimize. Count starts at 1. |
451 | /// Returns an empty set for all call sites in the function. |
452 | std::set<size_t> getCallSitesToOptimize(const BinaryFunction &) const; |
453 | |
454 | public: |
455 | explicit SpecializeMemcpy1(const cl::opt<bool> &PrintPass, |
456 | cl::list<std::string> &Spec) |
457 | : BinaryFunctionPass(PrintPass), Spec(Spec) {} |
458 | |
459 | bool shouldOptimize(const BinaryFunction &BF) const override; |
460 | |
461 | const char *getName() const override { return "specialize-memcpy" ; } |
462 | |
463 | Error runOnFunctions(BinaryContext &BC) override; |
464 | }; |
465 | |
466 | /// Pass to remove nops in code |
467 | class RemoveNops : public BinaryFunctionPass { |
468 | void runOnFunction(BinaryFunction &Function); |
469 | |
470 | public: |
471 | explicit RemoveNops(const cl::opt<bool> &PrintPass) |
472 | : BinaryFunctionPass(PrintPass) {} |
473 | |
474 | const char *getName() const override { return "remove-nops" ; } |
475 | |
476 | /// Pass entry point |
477 | Error runOnFunctions(BinaryContext &BC) override; |
478 | }; |
479 | |
480 | enum FrameOptimizationType : char { |
481 | FOP_NONE, /// Don't perform FOP. |
482 | FOP_HOT, /// Perform FOP on hot functions. |
483 | FOP_ALL /// Perform FOP on all functions. |
484 | }; |
485 | |
486 | } // namespace bolt |
487 | } // namespace llvm |
488 | |
489 | #endif |
490 | |