1//===- bolt/Rewrite/BinaryPassManager.cpp - Binary-level pass manager -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "bolt/Rewrite/BinaryPassManager.h"
10#include "bolt/Passes/ADRRelaxationPass.h"
11#include "bolt/Passes/Aligner.h"
12#include "bolt/Passes/AllocCombiner.h"
13#include "bolt/Passes/AsmDump.h"
14#include "bolt/Passes/CMOVConversion.h"
15#include "bolt/Passes/FixRISCVCallsPass.h"
16#include "bolt/Passes/FixRelaxationPass.h"
17#include "bolt/Passes/FrameOptimizer.h"
18#include "bolt/Passes/Hugify.h"
19#include "bolt/Passes/IdenticalCodeFolding.h"
20#include "bolt/Passes/IndirectCallPromotion.h"
21#include "bolt/Passes/Inliner.h"
22#include "bolt/Passes/Instrumentation.h"
23#include "bolt/Passes/JTFootprintReduction.h"
24#include "bolt/Passes/LongJmp.h"
25#include "bolt/Passes/LoopInversionPass.h"
26#include "bolt/Passes/PLTCall.h"
27#include "bolt/Passes/PatchEntries.h"
28#include "bolt/Passes/RegReAssign.h"
29#include "bolt/Passes/ReorderData.h"
30#include "bolt/Passes/ReorderFunctions.h"
31#include "bolt/Passes/RetpolineInsertion.h"
32#include "bolt/Passes/SplitFunctions.h"
33#include "bolt/Passes/StokeInfo.h"
34#include "bolt/Passes/TailDuplication.h"
35#include "bolt/Passes/ThreeWayBranch.h"
36#include "bolt/Passes/ValidateInternalCalls.h"
37#include "bolt/Passes/ValidateMemRefs.h"
38#include "bolt/Passes/VeneerElimination.h"
39#include "bolt/Utils/CommandLineOpts.h"
40#include "llvm/Support/FormatVariadic.h"
41#include "llvm/Support/Timer.h"
42#include "llvm/Support/raw_ostream.h"
43#include <memory>
44#include <numeric>
45
46using namespace llvm;
47
48namespace opts {
49
50extern cl::opt<bool> PrintAll;
51extern cl::opt<bool> PrintDynoStats;
52extern cl::opt<bool> DumpDotAll;
53extern cl::opt<std::string> AsmDump;
54extern cl::opt<bolt::PLTCall::OptType> PLT;
55
56static cl::opt<bool>
57DynoStatsAll("dyno-stats-all",
58 cl::desc("print dyno stats after each stage"),
59 cl::ZeroOrMore, cl::Hidden, cl::cat(BoltCategory));
60
61static cl::opt<bool>
62 EliminateUnreachable("eliminate-unreachable",
63 cl::desc("eliminate unreachable code"), cl::init(Val: true),
64 cl::cat(BoltOptCategory));
65
66cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
67 cl::cat(BoltOptCategory));
68
69static cl::opt<bool> JTFootprintReductionFlag(
70 "jt-footprint-reduction",
71 cl::desc("make jump tables size smaller at the cost of using more "
72 "instructions at jump sites"),
73 cl::cat(BoltOptCategory));
74
75cl::opt<bool>
76 KeepNops("keep-nops",
77 cl::desc("keep no-op instructions. By default they are removed."),
78 cl::Hidden, cl::cat(BoltOptCategory));
79
80cl::opt<bool> NeverPrint("never-print", cl::desc("never print"),
81 cl::ReallyHidden, cl::cat(BoltOptCategory));
82
83cl::opt<bool>
84PrintAfterBranchFixup("print-after-branch-fixup",
85 cl::desc("print function after fixing local branches"),
86 cl::Hidden, cl::cat(BoltOptCategory));
87
88static cl::opt<bool>
89PrintAfterLowering("print-after-lowering",
90 cl::desc("print function after instruction lowering"),
91 cl::Hidden, cl::cat(BoltOptCategory));
92
93cl::opt<bool>
94PrintFinalized("print-finalized",
95 cl::desc("print function after CFG is finalized"),
96 cl::Hidden, cl::cat(BoltOptCategory));
97
98static cl::opt<bool>
99 PrintFOP("print-fop",
100 cl::desc("print functions after frame optimizer pass"), cl::Hidden,
101 cl::cat(BoltOptCategory));
102
103static cl::opt<bool>
104 PrintICF("print-icf", cl::desc("print functions after ICF optimization"),
105 cl::Hidden, cl::cat(BoltOptCategory));
106
107static cl::opt<bool>
108 PrintICP("print-icp",
109 cl::desc("print functions after indirect call promotion"),
110 cl::Hidden, cl::cat(BoltOptCategory));
111
112static cl::opt<bool>
113 PrintInline("print-inline",
114 cl::desc("print functions after inlining optimization"),
115 cl::Hidden, cl::cat(BoltOptCategory));
116
117static cl::opt<bool> PrintJTFootprintReduction(
118 "print-after-jt-footprint-reduction",
119 cl::desc("print function after jt-footprint-reduction pass"), cl::Hidden,
120 cl::cat(BoltOptCategory));
121
122static cl::opt<bool>
123 PrintLongJmp("print-longjmp",
124 cl::desc("print functions after longjmp pass"), cl::Hidden,
125 cl::cat(BoltOptCategory));
126
127cl::opt<bool>
128 PrintNormalized("print-normalized",
129 cl::desc("print functions after CFG is normalized"),
130 cl::Hidden, cl::cat(BoltCategory));
131
132static cl::opt<bool> PrintOptimizeBodyless(
133 "print-optimize-bodyless",
134 cl::desc("print functions after bodyless optimization"), cl::Hidden,
135 cl::cat(BoltOptCategory));
136
137static cl::opt<bool>
138 PrintPeepholes("print-peepholes",
139 cl::desc("print functions after peephole optimization"),
140 cl::Hidden, cl::cat(BoltOptCategory));
141
142static cl::opt<bool>
143 PrintPLT("print-plt", cl::desc("print functions after PLT optimization"),
144 cl::Hidden, cl::cat(BoltOptCategory));
145
146static cl::opt<bool>
147 PrintProfileStats("print-profile-stats",
148 cl::desc("print profile quality/bias analysis"),
149 cl::cat(BoltCategory));
150
151static cl::opt<bool>
152 PrintRegReAssign("print-regreassign",
153 cl::desc("print functions after regreassign pass"),
154 cl::Hidden, cl::cat(BoltOptCategory));
155
156cl::opt<bool>
157 PrintReordered("print-reordered",
158 cl::desc("print functions after layout optimization"),
159 cl::Hidden, cl::cat(BoltOptCategory));
160
161static cl::opt<bool>
162 PrintReorderedFunctions("print-reordered-functions",
163 cl::desc("print functions after clustering"),
164 cl::Hidden, cl::cat(BoltOptCategory));
165
166static cl::opt<bool> PrintRetpolineInsertion(
167 "print-retpoline-insertion",
168 cl::desc("print functions after retpoline insertion pass"), cl::Hidden,
169 cl::cat(BoltCategory));
170
171static cl::opt<bool> PrintSCTC(
172 "print-sctc",
173 cl::desc("print functions after conditional tail call simplification"),
174 cl::Hidden, cl::cat(BoltOptCategory));
175
176static cl::opt<bool> PrintSimplifyROLoads(
177 "print-simplify-rodata-loads",
178 cl::desc("print functions after simplification of RO data loads"),
179 cl::Hidden, cl::cat(BoltOptCategory));
180
181static cl::opt<bool>
182 PrintSplit("print-split", cl::desc("print functions after code splitting"),
183 cl::Hidden, cl::cat(BoltOptCategory));
184
185static cl::opt<bool>
186 PrintStoke("print-stoke", cl::desc("print functions after stoke analysis"),
187 cl::Hidden, cl::cat(BoltOptCategory));
188
189static cl::opt<bool>
190 PrintFixRelaxations("print-fix-relaxations",
191 cl::desc("print functions after fix relaxations pass"),
192 cl::Hidden, cl::cat(BoltOptCategory));
193
194static cl::opt<bool>
195 PrintFixRISCVCalls("print-fix-riscv-calls",
196 cl::desc("print functions after fix RISCV calls pass"),
197 cl::Hidden, cl::cat(BoltOptCategory));
198
199static cl::opt<bool> PrintVeneerElimination(
200 "print-veneer-elimination",
201 cl::desc("print functions after veneer elimination pass"), cl::Hidden,
202 cl::cat(BoltOptCategory));
203
204static cl::opt<bool>
205 PrintUCE("print-uce",
206 cl::desc("print functions after unreachable code elimination"),
207 cl::Hidden, cl::cat(BoltOptCategory));
208
209static cl::opt<bool> RegReAssign(
210 "reg-reassign",
211 cl::desc(
212 "reassign registers so as to avoid using REX prefixes in hot code"),
213 cl::cat(BoltOptCategory));
214
215static cl::opt<bool> SimplifyConditionalTailCalls(
216 "simplify-conditional-tail-calls",
217 cl::desc("simplify conditional tail calls by removing unnecessary jumps"),
218 cl::init(Val: true), cl::cat(BoltOptCategory));
219
220static cl::opt<bool> SimplifyRODataLoads(
221 "simplify-rodata-loads",
222 cl::desc("simplify loads from read-only sections by replacing the memory "
223 "operand with the constant found in the corresponding section"),
224 cl::cat(BoltOptCategory));
225
226static cl::list<std::string>
227SpecializeMemcpy1("memcpy1-spec",
228 cl::desc("list of functions with call sites for which to specialize memcpy() "
229 "for size 1"),
230 cl::value_desc("func1,func2:cs1:cs2,func3:cs1,..."),
231 cl::ZeroOrMore, cl::cat(BoltOptCategory));
232
233static cl::opt<bool> Stoke("stoke", cl::desc("turn on the stoke analysis"),
234 cl::cat(BoltOptCategory));
235
236static cl::opt<bool> StringOps(
237 "inline-memcpy",
238 cl::desc("inline memcpy using 'rep movsb' instruction (X86-only)"),
239 cl::cat(BoltOptCategory));
240
241static cl::opt<bool> StripRepRet(
242 "strip-rep-ret",
243 cl::desc("strip 'repz' prefix from 'repz retq' sequence (on by default)"),
244 cl::init(Val: true), cl::cat(BoltOptCategory));
245
246static cl::opt<bool> VerifyCFG("verify-cfg",
247 cl::desc("verify the CFG after every pass"),
248 cl::Hidden, cl::cat(BoltOptCategory));
249
250static cl::opt<bool> ThreeWayBranchFlag("three-way-branch",
251 cl::desc("reorder three way branches"),
252 cl::ReallyHidden,
253 cl::cat(BoltOptCategory));
254
255static cl::opt<bool> CMOVConversionFlag("cmov-conversion",
256 cl::desc("fold jcc+mov into cmov"),
257 cl::ReallyHidden,
258 cl::cat(BoltOptCategory));
259
260} // namespace opts
261
262namespace llvm {
263namespace bolt {
264
265using namespace opts;
266
267const char BinaryFunctionPassManager::TimerGroupName[] = "passman";
268const char BinaryFunctionPassManager::TimerGroupDesc[] =
269 "Binary Function Pass Manager";
270
271Error BinaryFunctionPassManager::runPasses() {
272 auto &BFs = BC.getBinaryFunctions();
273 for (size_t PassIdx = 0; PassIdx < Passes.size(); PassIdx++) {
274 const std::pair<const bool, std::unique_ptr<BinaryFunctionPass>>
275 &OptPassPair = Passes[PassIdx];
276 if (!OptPassPair.first)
277 continue;
278
279 const std::unique_ptr<BinaryFunctionPass> &Pass = OptPassPair.second;
280 std::string PassIdName =
281 formatv(Fmt: "{0:2}_{1}", Vals&: PassIdx, Vals: Pass->getName()).str();
282
283 if (opts::Verbosity > 0)
284 BC.outs() << "BOLT-INFO: Starting pass: " << Pass->getName() << "\n";
285
286 NamedRegionTimer T(Pass->getName(), Pass->getName(), TimerGroupName,
287 TimerGroupDesc, TimeOpts);
288
289 Error E = Error::success();
290 callWithDynoStats(
291 OS&: BC.outs(),
292 Func: [this, &E, &Pass] {
293 E = joinErrors(E1: std::move(E), E2: Pass->runOnFunctions(BC));
294 },
295 Funcs&: BFs, Phase: Pass->getName(), Flag: opts::DynoStatsAll, IsAArch64: BC.isAArch64());
296 if (E)
297 return Error(std::move(E));
298
299 if (opts::VerifyCFG &&
300 !std::accumulate(
301 first: BFs.begin(), last: BFs.end(), init: true,
302 binary_op: [](const bool Valid,
303 const std::pair<const uint64_t, BinaryFunction> &It) {
304 return Valid && It.second.validateCFG();
305 })) {
306 return createFatalBOLTError(
307 S: Twine("BOLT-ERROR: Invalid CFG detected after pass ") +
308 Twine(Pass->getName()) + Twine("\n"));
309 }
310
311 if (opts::Verbosity > 0)
312 BC.outs() << "BOLT-INFO: Finished pass: " << Pass->getName() << "\n";
313
314 if (!opts::PrintAll && !opts::DumpDotAll && !Pass->printPass())
315 continue;
316
317 const std::string Message = std::string("after ") + Pass->getName();
318
319 for (auto &It : BFs) {
320 BinaryFunction &Function = It.second;
321
322 if (!Pass->shouldPrint(BF: Function))
323 continue;
324
325 Function.print(OS&: BC.outs(), Annotation: Message);
326
327 if (opts::DumpDotAll)
328 Function.dumpGraphForPass(Annotation: PassIdName);
329 }
330 }
331 return Error::success();
332}
333
334Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
335 BinaryFunctionPassManager Manager(BC);
336
337 const DynoStats InitialDynoStats =
338 getDynoStats(Funcs&: BC.getBinaryFunctions(), IsAArch64: BC.isAArch64());
339
340 Manager.registerPass(Pass: std::make_unique<AsmDumpPass>(),
341 Run: opts::AsmDump.getNumOccurrences());
342
343 if (BC.isAArch64()) {
344 Manager.registerPass(Pass: std::make_unique<FixRelaxations>(args&: PrintFixRelaxations));
345
346 Manager.registerPass(
347 Pass: std::make_unique<VeneerElimination>(args&: PrintVeneerElimination));
348 }
349
350 if (BC.isRISCV()) {
351 Manager.registerPass(
352 Pass: std::make_unique<FixRISCVCallsPass>(args&: PrintFixRISCVCalls));
353 }
354
355 // Here we manage dependencies/order manually, since passes are run in the
356 // order they're registered.
357
358 // Run this pass first to use stats for the original functions.
359 Manager.registerPass(Pass: std::make_unique<PrintProgramStats>(args&: NeverPrint));
360
361 if (opts::PrintProfileStats)
362 Manager.registerPass(Pass: std::make_unique<PrintProfileStats>(args&: NeverPrint));
363
364 Manager.registerPass(Pass: std::make_unique<ValidateInternalCalls>(args&: NeverPrint));
365
366 Manager.registerPass(Pass: std::make_unique<ValidateMemRefs>(args&: NeverPrint));
367
368 if (opts::Instrument)
369 Manager.registerPass(Pass: std::make_unique<Instrumentation>(args&: NeverPrint));
370 else if (opts::Hugify)
371 Manager.registerPass(Pass: std::make_unique<HugePage>(args&: NeverPrint));
372
373 Manager.registerPass(Pass: std::make_unique<ShortenInstructions>(args&: NeverPrint));
374
375 Manager.registerPass(Pass: std::make_unique<RemoveNops>(args&: NeverPrint),
376 Run: !opts::KeepNops);
377
378 Manager.registerPass(Pass: std::make_unique<NormalizeCFG>(args&: PrintNormalized));
379
380 if (BC.isX86())
381 Manager.registerPass(Pass: std::make_unique<StripRepRet>(args&: NeverPrint),
382 Run: opts::StripRepRet);
383
384 Manager.registerPass(Pass: std::make_unique<IdenticalCodeFolding>(args&: PrintICF),
385 Run: opts::ICF);
386
387 Manager.registerPass(
388 Pass: std::make_unique<SpecializeMemcpy1>(args&: NeverPrint, args&: opts::SpecializeMemcpy1),
389 Run: !opts::SpecializeMemcpy1.empty());
390
391 Manager.registerPass(Pass: std::make_unique<InlineMemcpy>(args&: NeverPrint),
392 Run: opts::StringOps);
393
394 Manager.registerPass(Pass: std::make_unique<IndirectCallPromotion>(args&: PrintICP));
395
396 Manager.registerPass(
397 Pass: std::make_unique<JTFootprintReduction>(args&: PrintJTFootprintReduction),
398 Run: opts::JTFootprintReductionFlag);
399
400 Manager.registerPass(
401 Pass: std::make_unique<SimplifyRODataLoads>(args&: PrintSimplifyROLoads),
402 Run: opts::SimplifyRODataLoads);
403
404 Manager.registerPass(Pass: std::make_unique<RegReAssign>(args&: PrintRegReAssign),
405 Run: opts::RegReAssign);
406
407 Manager.registerPass(Pass: std::make_unique<Inliner>(args&: PrintInline));
408
409 Manager.registerPass(Pass: std::make_unique<IdenticalCodeFolding>(args&: PrintICF),
410 Run: opts::ICF);
411
412 Manager.registerPass(Pass: std::make_unique<PLTCall>(args&: PrintPLT));
413
414 Manager.registerPass(Pass: std::make_unique<ThreeWayBranch>(),
415 Run: opts::ThreeWayBranchFlag);
416
417 Manager.registerPass(Pass: std::make_unique<ReorderBasicBlocks>(args&: PrintReordered));
418
419 Manager.registerPass(Pass: std::make_unique<EliminateUnreachableBlocks>(args&: PrintUCE),
420 Run: opts::EliminateUnreachable);
421
422 Manager.registerPass(Pass: std::make_unique<SplitFunctions>(args&: PrintSplit));
423
424 Manager.registerPass(Pass: std::make_unique<LoopInversionPass>());
425
426 Manager.registerPass(Pass: std::make_unique<TailDuplication>());
427
428 Manager.registerPass(Pass: std::make_unique<CMOVConversion>(),
429 Run: opts::CMOVConversionFlag);
430
431 // This pass syncs local branches with CFG. If any of the following
432 // passes breaks the sync - they either need to re-run the pass or
433 // fix branches consistency internally.
434 Manager.registerPass(Pass: std::make_unique<FixupBranches>(args&: PrintAfterBranchFixup));
435
436 // This pass should come close to last since it uses the estimated hot
437 // size of a function to determine the order. It should definitely
438 // also happen after any changes to the call graph are made, e.g. inlining.
439 Manager.registerPass(
440 Pass: std::make_unique<ReorderFunctions>(args&: PrintReorderedFunctions));
441
442 // This is the second run of the SplitFunctions pass required by certain
443 // splitting strategies (e.g. cdsplit). Running the SplitFunctions pass again
444 // after ReorderFunctions allows the finalized function order to be utilized
445 // to make more sophisticated splitting decisions, like hot-warm-cold
446 // splitting.
447 Manager.registerPass(Pass: std::make_unique<SplitFunctions>(args&: PrintSplit));
448
449 // Print final dyno stats right while CFG and instruction analysis are intact.
450 Manager.registerPass(
451 Pass: std::make_unique<DynoStatsPrintPass>(
452 args: InitialDynoStats, args: "after all optimizations before SCTC and FOP"),
453 Run: opts::PrintDynoStats || opts::DynoStatsAll);
454
455 // Add the StokeInfo pass, which extract functions for stoke optimization and
456 // get the liveness information for them
457 Manager.registerPass(Pass: std::make_unique<StokeInfo>(args&: PrintStoke), Run: opts::Stoke);
458
459 // This pass introduces conditional jumps into external functions.
460 // Between extending CFG to support this and isolating this pass we chose
461 // the latter. Thus this pass will do double jump removal and unreachable
462 // code elimination if necessary and won't rely on peepholes/UCE for these
463 // optimizations.
464 // More generally this pass should be the last optimization pass that
465 // modifies branches/control flow. This pass is run after function
466 // reordering so that it can tell whether calls are forward/backward
467 // accurately.
468 Manager.registerPass(
469 Pass: std::make_unique<SimplifyConditionalTailCalls>(args&: PrintSCTC),
470 Run: opts::SimplifyConditionalTailCalls);
471
472 Manager.registerPass(Pass: std::make_unique<Peepholes>(args&: PrintPeepholes));
473
474 Manager.registerPass(Pass: std::make_unique<AlignerPass>());
475
476 // Perform reordering on data contained in one or more sections using
477 // memory profiling data.
478 Manager.registerPass(Pass: std::make_unique<ReorderData>());
479
480 if (BC.isAArch64()) {
481 Manager.registerPass(Pass: std::make_unique<ADRRelaxationPass>());
482
483 // Tighten branches according to offset differences between branch and
484 // targets. No extra instructions after this pass, otherwise we may have
485 // relocations out of range and crash during linking.
486 Manager.registerPass(Pass: std::make_unique<LongJmpPass>(args&: PrintLongJmp));
487 }
488
489 // This pass should always run last.*
490 Manager.registerPass(Pass: std::make_unique<FinalizeFunctions>(args&: PrintFinalized));
491
492 // FrameOptimizer has an implicit dependency on FinalizeFunctions.
493 // FrameOptimizer move values around and needs to update CFIs. To do this, it
494 // must read CFI, interpret it and rewrite it, so CFIs need to be correctly
495 // placed according to the final layout.
496 Manager.registerPass(Pass: std::make_unique<FrameOptimizerPass>(args&: PrintFOP));
497
498 Manager.registerPass(Pass: std::make_unique<AllocCombinerPass>(args&: PrintFOP));
499
500 Manager.registerPass(
501 Pass: std::make_unique<RetpolineInsertion>(args&: PrintRetpolineInsertion));
502
503 // Assign each function an output section.
504 Manager.registerPass(Pass: std::make_unique<AssignSections>());
505
506 // Patch original function entries
507 if (BC.HasRelocations)
508 Manager.registerPass(Pass: std::make_unique<PatchEntries>());
509
510 // This pass turns tail calls into jumps which makes them invisible to
511 // function reordering. It's unsafe to use any CFG or instruction analysis
512 // after this point.
513 Manager.registerPass(
514 Pass: std::make_unique<InstructionLowering>(args&: PrintAfterLowering));
515
516 // In non-relocation mode, mark functions that do not fit into their original
517 // space as non-simple if we have to (e.g. for correct debug info update).
518 // NOTE: this pass depends on finalized code.
519 if (!BC.HasRelocations)
520 Manager.registerPass(Pass: std::make_unique<CheckLargeFunctions>(args&: NeverPrint));
521
522 Manager.registerPass(Pass: std::make_unique<LowerAnnotations>(args&: NeverPrint));
523
524 // Check for dirty state of MCSymbols caused by running calculateEmittedSize
525 // in parallel and restore them
526 Manager.registerPass(Pass: std::make_unique<CleanMCState>(args&: NeverPrint));
527
528 return Manager.runPasses();
529}
530
531} // namespace bolt
532} // namespace llvm
533

source code of bolt/lib/Rewrite/BinaryPassManager.cpp