1//===- Construction of pass pipelines -------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file provides the implementation of the PassBuilder based on our
11/// static pass registry as well as related functionality. It also provides
12/// helpers to aid in analyzing, debugging, and testing passes and pass
13/// pipelines.
14///
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/Statistic.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/BasicAliasAnalysis.h"
20#include "llvm/Analysis/CGSCCPassManager.h"
21#include "llvm/Analysis/GlobalsModRef.h"
22#include "llvm/Analysis/InlineAdvisor.h"
23#include "llvm/Analysis/ProfileSummaryInfo.h"
24#include "llvm/Analysis/ScopedNoAliasAA.h"
25#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26#include "llvm/IR/PassManager.h"
27#include "llvm/Passes/OptimizationLevel.h"
28#include "llvm/Passes/PassBuilder.h"
29#include "llvm/Support/CommandLine.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/PGOOptions.h"
32#include "llvm/Support/VirtualFileSystem.h"
33#include "llvm/Target/TargetMachine.h"
34#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
35#include "llvm/Transforms/Coroutines/CoroCleanup.h"
36#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
37#include "llvm/Transforms/Coroutines/CoroEarly.h"
38#include "llvm/Transforms/Coroutines/CoroElide.h"
39#include "llvm/Transforms/Coroutines/CoroSplit.h"
40#include "llvm/Transforms/HipStdPar/HipStdPar.h"
41#include "llvm/Transforms/IPO/AlwaysInliner.h"
42#include "llvm/Transforms/IPO/Annotation2Metadata.h"
43#include "llvm/Transforms/IPO/ArgumentPromotion.h"
44#include "llvm/Transforms/IPO/Attributor.h"
45#include "llvm/Transforms/IPO/CalledValuePropagation.h"
46#include "llvm/Transforms/IPO/ConstantMerge.h"
47#include "llvm/Transforms/IPO/CrossDSOCFI.h"
48#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
49#include "llvm/Transforms/IPO/ElimAvailExtern.h"
50#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
52#include "llvm/Transforms/IPO/FunctionAttrs.h"
53#include "llvm/Transforms/IPO/GlobalDCE.h"
54#include "llvm/Transforms/IPO/GlobalOpt.h"
55#include "llvm/Transforms/IPO/GlobalSplit.h"
56#include "llvm/Transforms/IPO/HotColdSplitting.h"
57#include "llvm/Transforms/IPO/IROutliner.h"
58#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
59#include "llvm/Transforms/IPO/Inliner.h"
60#include "llvm/Transforms/IPO/LowerTypeTests.h"
61#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
62#include "llvm/Transforms/IPO/MergeFunctions.h"
63#include "llvm/Transforms/IPO/ModuleInliner.h"
64#include "llvm/Transforms/IPO/OpenMPOpt.h"
65#include "llvm/Transforms/IPO/PartialInlining.h"
66#include "llvm/Transforms/IPO/SCCP.h"
67#include "llvm/Transforms/IPO/SampleProfile.h"
68#include "llvm/Transforms/IPO/SampleProfileProbe.h"
69#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
71#include "llvm/Transforms/InstCombine/InstCombine.h"
72#include "llvm/Transforms/Instrumentation/CGProfile.h"
73#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
74#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
75#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76#include "llvm/Transforms/Instrumentation/MemProfiler.h"
77#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
78#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
79#include "llvm/Transforms/Scalar/ADCE.h"
80#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
81#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
82#include "llvm/Transforms/Scalar/BDCE.h"
83#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
84#include "llvm/Transforms/Scalar/ConstraintElimination.h"
85#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
86#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
87#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
88#include "llvm/Transforms/Scalar/DivRemPairs.h"
89#include "llvm/Transforms/Scalar/EarlyCSE.h"
90#include "llvm/Transforms/Scalar/Float2Int.h"
91#include "llvm/Transforms/Scalar/GVN.h"
92#include "llvm/Transforms/Scalar/IndVarSimplify.h"
93#include "llvm/Transforms/Scalar/InferAlignment.h"
94#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
95#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
96#include "llvm/Transforms/Scalar/JumpThreading.h"
97#include "llvm/Transforms/Scalar/LICM.h"
98#include "llvm/Transforms/Scalar/LoopDeletion.h"
99#include "llvm/Transforms/Scalar/LoopDistribute.h"
100#include "llvm/Transforms/Scalar/LoopFlatten.h"
101#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
102#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
103#include "llvm/Transforms/Scalar/LoopInterchange.h"
104#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
105#include "llvm/Transforms/Scalar/LoopPassManager.h"
106#include "llvm/Transforms/Scalar/LoopRotation.h"
107#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
108#include "llvm/Transforms/Scalar/LoopSink.h"
109#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
110#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
111#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
112#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
113#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
114#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
115#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
116#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
117#include "llvm/Transforms/Scalar/NewGVN.h"
118#include "llvm/Transforms/Scalar/Reassociate.h"
119#include "llvm/Transforms/Scalar/SCCP.h"
120#include "llvm/Transforms/Scalar/SROA.h"
121#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
122#include "llvm/Transforms/Scalar/SimplifyCFG.h"
123#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
124#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
125#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
126#include "llvm/Transforms/Utils/AddDiscriminators.h"
127#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
128#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
129#include "llvm/Transforms/Utils/CountVisits.h"
130#include "llvm/Transforms/Utils/InjectTLIMappings.h"
131#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
132#include "llvm/Transforms/Utils/Mem2Reg.h"
133#include "llvm/Transforms/Utils/MoveAutoInit.h"
134#include "llvm/Transforms/Utils/NameAnonGlobals.h"
135#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
136#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
137#include "llvm/Transforms/Vectorize/LoopVectorize.h"
138#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
139#include "llvm/Transforms/Vectorize/VectorCombine.h"
140
141using namespace llvm;
142
143static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
144 "enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
145 cl::desc("Enable ML policy for inliner. Currently trained for -Oz only"),
146 cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
147 "Heuristics-based inliner version"),
148 clEnumValN(InliningAdvisorMode::Development, "development",
149 "Use development mode (runtime-loadable model)"),
150 clEnumValN(InliningAdvisorMode::Release, "release",
151 "Use release mode (AOT-compiled model)")));
152
153static cl::opt<bool> EnableSyntheticCounts(
154 "enable-npm-synthetic-counts", cl::Hidden,
155 cl::desc("Run synthetic function entry count generation "
156 "pass"));
157
158/// Flag to enable inline deferral during PGO.
159static cl::opt<bool>
160 EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
161 cl::Hidden,
162 cl::desc("Enable inline deferral during PGO"));
163
164static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
165 cl::init(Val: false), cl::Hidden,
166 cl::desc("Enable module inliner"));
167
168static cl::opt<bool> PerformMandatoryInliningsFirst(
169 "mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
170 cl::desc("Perform mandatory inlinings module-wide, before performing "
171 "inlining"));
172
173static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
174 "eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
175 cl::desc("Eagerly invalidate more analyses in default pipelines"));
176
177static cl::opt<bool> EnableMergeFunctions(
178 "enable-merge-functions", cl::init(Val: false), cl::Hidden,
179 cl::desc("Enable function merging as part of the optimization pipeline"));
180
181static cl::opt<bool> EnablePostPGOLoopRotation(
182 "enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
183 cl::desc("Run the loop rotation transformation after PGO instrumentation"));
184
185static cl::opt<bool> EnableGlobalAnalyses(
186 "enable-global-analyses", cl::init(Val: true), cl::Hidden,
187 cl::desc("Enable inter-procedural analyses"));
188
189static cl::opt<bool>
190 RunPartialInlining("enable-partial-inlining", cl::init(Val: false), cl::Hidden,
191 cl::desc("Run Partial inlinining pass"));
192
193static cl::opt<bool> ExtraVectorizerPasses(
194 "extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
195 cl::desc("Run cleanup optimization passes after vectorization"));
196
197static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
198 cl::desc("Run the NewGVN pass"));
199
200static cl::opt<bool> EnableLoopInterchange(
201 "enable-loopinterchange", cl::init(Val: false), cl::Hidden,
202 cl::desc("Enable the experimental LoopInterchange Pass"));
203
204static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
205 cl::init(Val: false), cl::Hidden,
206 cl::desc("Enable Unroll And Jam Pass"));
207
208static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
209 cl::Hidden,
210 cl::desc("Enable the LoopFlatten Pass"));
211
212// Experimentally allow loop header duplication. This should allow for better
213// optimization at Oz, since loop-idiom recognition can then recognize things
214// like memcpy. If this ends up being useful for many targets, we should drop
215// this flag and make a code generation option that can be controlled
216// independent of the opt level and exposed through the frontend.
217static cl::opt<bool> EnableLoopHeaderDuplication(
218 "enable-loop-header-duplication", cl::init(Val: false), cl::Hidden,
219 cl::desc("Enable loop header duplication at any optimization level"));
220
221static cl::opt<bool>
222 EnableDFAJumpThreading("enable-dfa-jump-thread",
223 cl::desc("Enable DFA jump threading"),
224 cl::init(Val: false), cl::Hidden);
225
226// TODO: turn on and remove flag
227static cl::opt<bool> EnablePGOForceFunctionAttrs(
228 "enable-pgo-force-function-attrs",
229 cl::desc("Enable pass to set function attributes based on PGO profiles"),
230 cl::init(Val: false));
231
232static cl::opt<bool>
233 EnableHotColdSplit("hot-cold-split",
234 cl::desc("Enable hot-cold splitting pass"));
235
236static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
237 cl::Hidden,
238 cl::desc("Enable ir outliner pass"));
239
240static cl::opt<bool>
241 DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
242 cl::desc("Disable pre-instrumentation inliner"));
243
244static cl::opt<int> PreInlineThreshold(
245 "preinline-threshold", cl::Hidden, cl::init(Val: 75),
246 cl::desc("Control the amount of inlining in pre-instrumentation inliner "
247 "(default = 75)"));
248
249static cl::opt<bool>
250 EnableGVNHoist("enable-gvn-hoist",
251 cl::desc("Enable the GVN hoisting pass (default = off)"));
252
253static cl::opt<bool>
254 EnableGVNSink("enable-gvn-sink",
255 cl::desc("Enable the GVN sinking pass (default = off)"));
256
257static cl::opt<bool> EnableJumpTableToSwitch(
258 "enable-jump-table-to-switch",
259 cl::desc("Enable JumpTableToSwitch pass (default = off)"));
260
261// This option is used in simplifying testing SampleFDO optimizations for
262// profile loading.
263static cl::opt<bool>
264 EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
265 cl::desc("Enable control height reduction optimization (CHR)"));
266
267static cl::opt<bool> FlattenedProfileUsed(
268 "flattened-profile-used", cl::init(Val: false), cl::Hidden,
269 cl::desc("Indicate the sample profile being used is flattened, i.e., "
270 "no inline hierachy exists in the profile"));
271
272static cl::opt<bool> EnableOrderFileInstrumentation(
273 "enable-order-file-instrumentation", cl::init(Val: false), cl::Hidden,
274 cl::desc("Enable order file instrumentation (default = off)"));
275
276static cl::opt<bool>
277 EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
278 cl::desc("Enable lowering of the matrix intrinsics"));
279
280static cl::opt<bool> EnableConstraintElimination(
281 "enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
282 cl::desc(
283 "Enable pass to eliminate conditions based on linear constraints"));
284
285static cl::opt<AttributorRunOption> AttributorRun(
286 "attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
287 cl::desc("Enable the attributor inter-procedural deduction pass"),
288 cl::values(clEnumValN(AttributorRunOption::ALL, "all",
289 "enable all attributor runs"),
290 clEnumValN(AttributorRunOption::MODULE, "module",
291 "enable module-wide attributor runs"),
292 clEnumValN(AttributorRunOption::CGSCC, "cgscc",
293 "enable call graph SCC attributor runs"),
294 clEnumValN(AttributorRunOption::NONE, "none",
295 "disable attributor runs")));
296
297static cl::opt<bool> UseLoopVersioningLICM(
298 "enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
299 cl::desc("Enable the experimental Loop Versioning LICM pass"));
300
301namespace llvm {
302extern cl::opt<bool> EnableMemProfContextDisambiguation;
303
304extern cl::opt<bool> EnableInferAlignmentPass;
305} // namespace llvm
306
307PipelineTuningOptions::PipelineTuningOptions() {
308 LoopInterleaving = true;
309 LoopVectorization = true;
310 SLPVectorization = false;
311 LoopUnrolling = true;
312 ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
313 LicmMssaOptCap = SetLicmMssaOptCap;
314 LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
315 CallGraphProfile = true;
316 UnifiedLTO = false;
317 MergeFunctions = EnableMergeFunctions;
318 InlinerThreshold = -1;
319 EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
320}
321
322namespace llvm {
323extern cl::opt<unsigned> MaxDevirtIterations;
324} // namespace llvm
325
326void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
327 OptimizationLevel Level) {
328 for (auto &C : PeepholeEPCallbacks)
329 C(FPM, Level);
330}
331void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
332 LoopPassManager &LPM, OptimizationLevel Level) {
333 for (auto &C : LateLoopOptimizationsEPCallbacks)
334 C(LPM, Level);
335}
336void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
337 OptimizationLevel Level) {
338 for (auto &C : LoopOptimizerEndEPCallbacks)
339 C(LPM, Level);
340}
341void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
342 FunctionPassManager &FPM, OptimizationLevel Level) {
343 for (auto &C : ScalarOptimizerLateEPCallbacks)
344 C(FPM, Level);
345}
346void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
347 OptimizationLevel Level) {
348 for (auto &C : CGSCCOptimizerLateEPCallbacks)
349 C(CGPM, Level);
350}
351void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
352 OptimizationLevel Level) {
353 for (auto &C : VectorizerStartEPCallbacks)
354 C(FPM, Level);
355}
356void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
357 OptimizationLevel Level) {
358 for (auto &C : OptimizerEarlyEPCallbacks)
359 C(MPM, Level);
360}
361void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
362 OptimizationLevel Level) {
363 for (auto &C : OptimizerLastEPCallbacks)
364 C(MPM, Level);
365}
366void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
367 ModulePassManager &MPM, OptimizationLevel Level) {
368 for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
369 C(MPM, Level);
370}
371void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
372 ModulePassManager &MPM, OptimizationLevel Level) {
373 for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
374 C(MPM, Level);
375}
376void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
377 OptimizationLevel Level) {
378 for (auto &C : PipelineStartEPCallbacks)
379 C(MPM, Level);
380}
381void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
382 ModulePassManager &MPM, OptimizationLevel Level) {
383 for (auto &C : PipelineEarlySimplificationEPCallbacks)
384 C(MPM, Level);
385}
386
387// Helper to add AnnotationRemarksPass.
388static void addAnnotationRemarksPass(ModulePassManager &MPM) {
389 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
390}
391
392// Helper to check if the current compilation phase is preparing for LTO
393static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
394 return Phase == ThinOrFullLTOPhase::ThinLTOPreLink ||
395 Phase == ThinOrFullLTOPhase::FullLTOPreLink;
396}
397
398// TODO: Investigate the cost/benefit of tail call elimination on debugging.
399FunctionPassManager
400PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
401 ThinOrFullLTOPhase Phase) {
402
403 FunctionPassManager FPM;
404
405 if (AreStatisticsEnabled())
406 FPM.addPass(Pass: CountVisitsPass());
407
408 // Form SSA out of local memory accesses after breaking apart aggregates into
409 // scalars.
410 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
411
412 // Catch trivial redundancies
413 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
414
415 // Hoisting of scalars and load expressions.
416 FPM.addPass(
417 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
418 FPM.addPass(Pass: InstCombinePass());
419
420 FPM.addPass(Pass: LibCallsShrinkWrapPass());
421
422 invokePeepholeEPCallbacks(FPM, Level);
423
424 FPM.addPass(
425 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
426
427 // Form canonically associated expression trees, and simplify the trees using
428 // basic mathematical properties. For example, this will form (nearly)
429 // minimal multiplication trees.
430 FPM.addPass(Pass: ReassociatePass());
431
432 // Add the primary loop simplification pipeline.
433 // FIXME: Currently this is split into two loop pass pipelines because we run
434 // some function passes in between them. These can and should be removed
435 // and/or replaced by scheduling the loop pass equivalents in the correct
436 // positions. But those equivalent passes aren't powerful enough yet.
437 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
438 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
439 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
440 // `LoopInstSimplify`.
441 LoopPassManager LPM1, LPM2;
442
443 // Simplify the loop body. We do this initially to clean up after other loop
444 // passes run, either when iterating on a loop or on inner loops with
445 // implications on the outer loop.
446 LPM1.addPass(Pass: LoopInstSimplifyPass());
447 LPM1.addPass(Pass: LoopSimplifyCFGPass());
448
449 // Try to remove as much code from the loop header as possible,
450 // to reduce amount of IR that will have to be duplicated. However,
451 // do not perform speculative hoisting the first time as LICM
452 // will destroy metadata that may not need to be destroyed if run
453 // after loop rotation.
454 // TODO: Investigate promotion cap for O1.
455 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
456 /*AllowSpeculation=*/false));
457
458 LPM1.addPass(Pass: LoopRotatePass(/* Disable header duplication */ true,
459 isLTOPreLink(Phase)));
460 // TODO: Investigate promotion cap for O1.
461 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
462 /*AllowSpeculation=*/true));
463 LPM1.addPass(Pass: SimpleLoopUnswitchPass());
464 if (EnableLoopFlatten)
465 LPM1.addPass(Pass: LoopFlattenPass());
466
467 LPM2.addPass(Pass: LoopIdiomRecognizePass());
468 LPM2.addPass(Pass: IndVarSimplifyPass());
469
470 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
471
472 LPM2.addPass(Pass: LoopDeletionPass());
473
474 if (EnableLoopInterchange)
475 LPM2.addPass(Pass: LoopInterchangePass());
476
477 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
478 // because it changes IR to makes profile annotation in back compile
479 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
480 // attributes so we need to make sure and allow the full unroll pass to pay
481 // attention to it.
482 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
483 PGOOpt->Action != PGOOptions::SampleUse)
484 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
485 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
486 PTO.ForgetAllSCEVInLoopUnroll));
487
488 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
489
490 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
491 /*UseMemorySSA=*/true,
492 /*UseBlockFrequencyInfo=*/true));
493 FPM.addPass(
494 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
495 FPM.addPass(Pass: InstCombinePass());
496 // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
497 // *All* loop passes must preserve it, in order to be able to use it.
498 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
499 /*UseMemorySSA=*/false,
500 /*UseBlockFrequencyInfo=*/false));
501
502 // Delete small array after loop unroll.
503 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
504
505 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
506 FPM.addPass(Pass: MemCpyOptPass());
507
508 // Sparse conditional constant propagation.
509 // FIXME: It isn't clear why we do this *after* loop passes rather than
510 // before...
511 FPM.addPass(Pass: SCCPPass());
512
513 // Delete dead bit computations (instcombine runs after to fold away the dead
514 // computations, and then ADCE will run later to exploit any new DCE
515 // opportunities that creates).
516 FPM.addPass(Pass: BDCEPass());
517
518 // Run instcombine after redundancy and dead bit elimination to exploit
519 // opportunities opened up by them.
520 FPM.addPass(Pass: InstCombinePass());
521 invokePeepholeEPCallbacks(FPM, Level);
522
523 FPM.addPass(Pass: CoroElidePass());
524
525 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
526
527 // Finally, do an expensive DCE pass to catch all the dead code exposed by
528 // the simplifications and basic cleanup after all the simplifications.
529 // TODO: Investigate if this is too expensive.
530 FPM.addPass(Pass: ADCEPass());
531 FPM.addPass(
532 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
533 FPM.addPass(Pass: InstCombinePass());
534 invokePeepholeEPCallbacks(FPM, Level);
535
536 return FPM;
537}
538
539FunctionPassManager
540PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
541 ThinOrFullLTOPhase Phase) {
542 assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
543
544 // The O1 pipeline has a separate pipeline creation function to simplify
545 // construction readability.
546 if (Level.getSpeedupLevel() == 1)
547 return buildO1FunctionSimplificationPipeline(Level, Phase);
548
549 FunctionPassManager FPM;
550
551 if (AreStatisticsEnabled())
552 FPM.addPass(Pass: CountVisitsPass());
553
554 // Form SSA out of local memory accesses after breaking apart aggregates into
555 // scalars.
556 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
557
558 // Catch trivial redundancies
559 FPM.addPass(Pass: EarlyCSEPass(true /* Enable mem-ssa. */));
560 if (EnableKnowledgeRetention)
561 FPM.addPass(Pass: AssumeSimplifyPass());
562
563 // Hoisting of scalars and load expressions.
564 if (EnableGVNHoist)
565 FPM.addPass(Pass: GVNHoistPass());
566
567 // Global value numbering based sinking.
568 if (EnableGVNSink) {
569 FPM.addPass(Pass: GVNSinkPass());
570 FPM.addPass(
571 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
572 }
573
574 // Speculative execution if the target has divergent branches; otherwise nop.
575 FPM.addPass(Pass: SpeculativeExecutionPass(/* OnlyIfDivergentTarget =*/true));
576
577 // Optimize based on known information about branches, and cleanup afterward.
578 FPM.addPass(Pass: JumpThreadingPass());
579 FPM.addPass(Pass: CorrelatedValuePropagationPass());
580
581 // Jump table to switch conversion.
582 if (EnableJumpTableToSwitch)
583 FPM.addPass(Pass: JumpTableToSwitchPass());
584
585 FPM.addPass(
586 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
587 FPM.addPass(Pass: InstCombinePass());
588 FPM.addPass(Pass: AggressiveInstCombinePass());
589
590 if (!Level.isOptimizingForSize())
591 FPM.addPass(Pass: LibCallsShrinkWrapPass());
592
593 invokePeepholeEPCallbacks(FPM, Level);
594
595 // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
596 // using the size value profile. Don't perform this when optimizing for size.
597 if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
598 !Level.isOptimizingForSize())
599 FPM.addPass(Pass: PGOMemOPSizeOpt());
600
601 FPM.addPass(Pass: TailCallElimPass());
602 FPM.addPass(
603 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
604
605 // Form canonically associated expression trees, and simplify the trees using
606 // basic mathematical properties. For example, this will form (nearly)
607 // minimal multiplication trees.
608 FPM.addPass(Pass: ReassociatePass());
609
610 if (EnableConstraintElimination)
611 FPM.addPass(Pass: ConstraintEliminationPass());
612
613 // Add the primary loop simplification pipeline.
614 // FIXME: Currently this is split into two loop pass pipelines because we run
615 // some function passes in between them. These can and should be removed
616 // and/or replaced by scheduling the loop pass equivalents in the correct
617 // positions. But those equivalent passes aren't powerful enough yet.
618 // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
619 // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
620 // fully replace `SimplifyCFGPass`, and the closest to the other we have is
621 // `LoopInstSimplify`.
622 LoopPassManager LPM1, LPM2;
623
624 // Simplify the loop body. We do this initially to clean up after other loop
625 // passes run, either when iterating on a loop or on inner loops with
626 // implications on the outer loop.
627 LPM1.addPass(Pass: LoopInstSimplifyPass());
628 LPM1.addPass(Pass: LoopSimplifyCFGPass());
629
630 // Try to remove as much code from the loop header as possible,
631 // to reduce amount of IR that will have to be duplicated. However,
632 // do not perform speculative hoisting the first time as LICM
633 // will destroy metadata that may not need to be destroyed if run
634 // after loop rotation.
635 // TODO: Investigate promotion cap for O1.
636 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
637 /*AllowSpeculation=*/false));
638
639 // Disable header duplication in loop rotation at -Oz.
640 LPM1.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
641 Level != OptimizationLevel::Oz,
642 isLTOPreLink(Phase)));
643 // TODO: Investigate promotion cap for O1.
644 LPM1.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
645 /*AllowSpeculation=*/true));
646 LPM1.addPass(
647 Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3));
648 if (EnableLoopFlatten)
649 LPM1.addPass(Pass: LoopFlattenPass());
650
651 LPM2.addPass(Pass: LoopIdiomRecognizePass());
652 LPM2.addPass(Pass: IndVarSimplifyPass());
653
654 {
655 ExtraSimpleLoopUnswitchPassManager ExtraPasses;
656 ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
657 OptimizationLevel::O3));
658 LPM2.addPass(Pass: std::move(ExtraPasses));
659 }
660
661 invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
662
663 LPM2.addPass(Pass: LoopDeletionPass());
664
665 if (EnableLoopInterchange)
666 LPM2.addPass(Pass: LoopInterchangePass());
667
668 // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
669 // because it changes IR to makes profile annotation in back compile
670 // inaccurate. The normal unroller doesn't pay attention to forced full unroll
671 // attributes so we need to make sure and allow the full unroll pass to pay
672 // attention to it.
673 if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt ||
674 PGOOpt->Action != PGOOptions::SampleUse)
675 LPM2.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
676 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
677 PTO.ForgetAllSCEVInLoopUnroll));
678
679 invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
680
681 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
682 /*UseMemorySSA=*/true,
683 /*UseBlockFrequencyInfo=*/true));
684 FPM.addPass(
685 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
686 FPM.addPass(Pass: InstCombinePass());
687 // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
688 // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
689 // *All* loop passes must preserve it, in order to be able to use it.
690 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
691 /*UseMemorySSA=*/false,
692 /*UseBlockFrequencyInfo=*/false));
693
694 // Delete small array after loop unroll.
695 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
696
697 // Try vectorization/scalarization transforms that are both improvements
698 // themselves and can allow further folds with GVN and InstCombine.
699 FPM.addPass(Pass: VectorCombinePass(/*TryEarlyFoldsOnly=*/true));
700
701 // Eliminate redundancies.
702 FPM.addPass(Pass: MergedLoadStoreMotionPass());
703 if (RunNewGVN)
704 FPM.addPass(Pass: NewGVNPass());
705 else
706 FPM.addPass(Pass: GVNPass());
707
708 // Sparse conditional constant propagation.
709 // FIXME: It isn't clear why we do this *after* loop passes rather than
710 // before...
711 FPM.addPass(Pass: SCCPPass());
712
713 // Delete dead bit computations (instcombine runs after to fold away the dead
714 // computations, and then ADCE will run later to exploit any new DCE
715 // opportunities that creates).
716 FPM.addPass(Pass: BDCEPass());
717
718 // Run instcombine after redundancy and dead bit elimination to exploit
719 // opportunities opened up by them.
720 FPM.addPass(Pass: InstCombinePass());
721 invokePeepholeEPCallbacks(FPM, Level);
722
723 // Re-consider control flow based optimizations after redundancy elimination,
724 // redo DCE, etc.
725 if (EnableDFAJumpThreading)
726 FPM.addPass(Pass: DFAJumpThreadingPass());
727
728 FPM.addPass(Pass: JumpThreadingPass());
729 FPM.addPass(Pass: CorrelatedValuePropagationPass());
730
731 // Finally, do an expensive DCE pass to catch all the dead code exposed by
732 // the simplifications and basic cleanup after all the simplifications.
733 // TODO: Investigate if this is too expensive.
734 FPM.addPass(Pass: ADCEPass());
735
736 // Specially optimize memory movement as it doesn't look like dataflow in SSA.
737 FPM.addPass(Pass: MemCpyOptPass());
738
739 FPM.addPass(Pass: DSEPass());
740 FPM.addPass(Pass: MoveAutoInitPass());
741
742 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
743 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
744 /*AllowSpeculation=*/true),
745 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
746
747 FPM.addPass(Pass: CoroElidePass());
748
749 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
750
751 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
752 .convertSwitchRangeToICmp(B: true)
753 .hoistCommonInsts(B: true)
754 .sinkCommonInsts(B: true)));
755 FPM.addPass(Pass: InstCombinePass());
756 invokePeepholeEPCallbacks(FPM, Level);
757
758 return FPM;
759}
760
761void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
762 MPM.addPass(Pass: CanonicalizeAliasesPass());
763 MPM.addPass(Pass: NameAnonGlobalPass());
764}
765
766void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
767 OptimizationLevel Level,
768 ThinOrFullLTOPhase LTOPhase) {
769 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
770 if (DisablePreInliner)
771 return;
772 InlineParams IP;
773
774 IP.DefaultThreshold = PreInlineThreshold;
775
776 // FIXME: The hint threshold has the same value used by the regular inliner
777 // when not optimzing for size. This should probably be lowered after
778 // performance testing.
779 // FIXME: this comment is cargo culted from the old pass manager, revisit).
780 IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : 325;
781 ModuleInlinerWrapperPass MIWP(
782 IP, /* MandatoryFirst */ true,
783 InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
784 CGSCCPassManager &CGPipeline = MIWP.getPM();
785
786 FunctionPassManager FPM;
787 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
788 FPM.addPass(Pass: EarlyCSEPass()); // Catch trivial redundancies.
789 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(
790 B: true))); // Merge & remove basic blocks.
791 FPM.addPass(Pass: InstCombinePass()); // Combine silly sequences.
792 invokePeepholeEPCallbacks(FPM, Level);
793
794 CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
795 Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
796
797 MPM.addPass(Pass: std::move(MIWP));
798
799 // Delete anything that is now dead to make sure that we don't instrument
800 // dead code. Instrumentation can end up keeping dead code around and
801 // dramatically increase code size.
802 MPM.addPass(Pass: GlobalDCEPass());
803}
804
805void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
806 OptimizationLevel Level, bool RunProfileGen,
807 bool IsCS, bool AtomicCounterUpdate,
808 std::string ProfileFile,
809 std::string ProfileRemappingFile,
810 IntrusiveRefCntPtr<vfs::FileSystem> FS) {
811 assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
812
813 if (!RunProfileGen) {
814 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
815 MPM.addPass(
816 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
817 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
818 // RequireAnalysisPass for PSI before subsequent non-module passes.
819 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
820 return;
821 }
822
823 // Perform PGO instrumentation.
824 MPM.addPass(Pass: PGOInstrumentationGen(IsCS));
825
826 if (EnablePostPGOLoopRotation) {
827 // Disable header duplication in loop rotation at -Oz.
828 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
829 Pass: createFunctionToLoopPassAdaptor(
830 Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
831 Level != OptimizationLevel::Oz),
832 /*UseMemorySSA=*/false,
833 /*UseBlockFrequencyInfo=*/false),
834 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
835 }
836
837 // Add the profile lowering pass.
838 InstrProfOptions Options;
839 if (!ProfileFile.empty())
840 Options.InstrProfileOutput = ProfileFile;
841 // Do counter promotion at Level greater than O0.
842 Options.DoCounterPromotion = true;
843 Options.UseBFIInPromotion = IsCS;
844 Options.Atomic = AtomicCounterUpdate;
845 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
846}
847
848void PassBuilder::addPGOInstrPassesForO0(
849 ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
850 bool AtomicCounterUpdate, std::string ProfileFile,
851 std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
852 if (!RunProfileGen) {
853 assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854 MPM.addPass(
855 Pass: PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS, FS));
856 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857 // RequireAnalysisPass for PSI before subsequent non-module passes.
858 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
859 return;
860 }
861
862 // Perform PGO instrumentation.
863 MPM.addPass(Pass: PGOInstrumentationGen(IsCS));
864 // Add the profile lowering pass.
865 InstrProfOptions Options;
866 if (!ProfileFile.empty())
867 Options.InstrProfileOutput = ProfileFile;
868 // Do not do counter promotion at O0.
869 Options.DoCounterPromotion = false;
870 Options.UseBFIInPromotion = IsCS;
871 Options.Atomic = AtomicCounterUpdate;
872 MPM.addPass(Pass: InstrProfilingLoweringPass(Options, IsCS));
873}
874
875static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
876 return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
877}
878
879ModuleInlinerWrapperPass
880PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
881 ThinOrFullLTOPhase Phase) {
882 InlineParams IP;
883 if (PTO.InlinerThreshold == -1)
884 IP = getInlineParamsFromOptLevel(Level);
885 else
886 IP = getInlineParams(Threshold: PTO.InlinerThreshold);
887 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
888 // disable hot callsite inline (as much as possible [1]) because it makes
889 // profile annotation in the backend inaccurate.
890 //
891 // [1] Note the cost of a function could be below zero due to erased
892 // prologue / epilogue.
893 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
894 PGOOpt->Action == PGOOptions::SampleUse)
895 IP.HotCallSiteThreshold = 0;
896
897 if (PGOOpt)
898 IP.EnableDeferral = EnablePGOInlineDeferral;
899
900 ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
901 InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
902 UseInlineAdvisor, MaxDevirtIterations);
903
904 // Require the GlobalsAA analysis for the module so we can query it within
905 // the CGSCC pipeline.
906 if (EnableGlobalAnalyses) {
907 MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
908 // Invalidate AAManager so it can be recreated and pick up the newly
909 // available GlobalsAA.
910 MIWP.addModulePass(
911 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
912 }
913
914 // Require the ProfileSummaryAnalysis for the module so we can query it within
915 // the inliner pass.
916 MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
917
918 // Now begin the main postorder CGSCC pipeline.
919 // FIXME: The current CGSCC pipeline has its origins in the legacy pass
920 // manager and trying to emulate its precise behavior. Much of this doesn't
921 // make a lot of sense and we should revisit the core CGSCC structure.
922 CGSCCPassManager &MainCGPipeline = MIWP.getPM();
923
924 // Note: historically, the PruneEH pass was run first to deduce nounwind and
925 // generally clean up exception handling overhead. It isn't clear this is
926 // valuable as the inliner doesn't currently care whether it is inlining an
927 // invoke or a call.
928
929 if (AttributorRun & AttributorRunOption::CGSCC)
930 MainCGPipeline.addPass(Pass: AttributorCGSCCPass());
931
932 // Deduce function attributes. We do another run of this after the function
933 // simplification pipeline, so this only needs to run when it could affect the
934 // function simplification pipeline, which is only the case with recursive
935 // functions.
936 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass(/*SkipNonRecursive*/ true));
937
938 // When at O3 add argument promotion to the pass pipeline.
939 // FIXME: It isn't at all clear why this should be limited to O3.
940 if (Level == OptimizationLevel::O3)
941 MainCGPipeline.addPass(Pass: ArgumentPromotionPass());
942
943 // Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
944 // there are no OpenMP runtime calls present in the module.
945 if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
946 MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass());
947
948 invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
949
950 // Add the core function simplification pipeline nested inside the
951 // CGSCC walk.
952 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
953 Pass: buildFunctionSimplificationPipeline(Level, Phase),
954 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /*NoRerun=*/true));
955
956 // Finally, deduce any function attributes based on the fully simplified
957 // function.
958 MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass());
959
960 // Mark that the function is fully simplified and that it shouldn't be
961 // simplified again if we somehow revisit it due to CGSCC mutations unless
962 // it's been modified since.
963 MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
964 Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
965
966 MainCGPipeline.addPass(Pass: CoroSplitPass(Level != OptimizationLevel::O0));
967
968 // Make sure we don't affect potential future NoRerun CGSCC adaptors.
969 MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
970 Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
971
972 return MIWP;
973}
974
975ModulePassManager
976PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
977 ThinOrFullLTOPhase Phase) {
978 ModulePassManager MPM;
979
980 InlineParams IP = getInlineParamsFromOptLevel(Level);
981 // For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
982 // disable hot callsite inline (as much as possible [1]) because it makes
983 // profile annotation in the backend inaccurate.
984 //
985 // [1] Note the cost of a function could be below zero due to erased
986 // prologue / epilogue.
987 if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
988 PGOOpt->Action == PGOOptions::SampleUse)
989 IP.HotCallSiteThreshold = 0;
990
991 if (PGOOpt)
992 IP.EnableDeferral = EnablePGOInlineDeferral;
993
994 // The inline deferral logic is used to avoid losing some
995 // inlining chance in future. It is helpful in SCC inliner, in which
996 // inlining is processed in bottom-up order.
997 // While in module inliner, the inlining order is a priority-based order
998 // by default. The inline deferral is unnecessary there. So we disable the
999 // inline deferral logic in module inliner.
1000 IP.EnableDeferral = false;
1001
1002 MPM.addPass(Pass: ModuleInlinerPass(IP, UseInlineAdvisor, Phase));
1003
1004 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1005 Pass: buildFunctionSimplificationPipeline(Level, Phase),
1006 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1007
1008 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1009 Pass: CoroSplitPass(Level != OptimizationLevel::O0)));
1010
1011 return MPM;
1012}
1013
1014ModulePassManager
1015PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1016 ThinOrFullLTOPhase Phase) {
1017 assert(Level != OptimizationLevel::O0 &&
1018 "Should not be used for O0 pipeline");
1019
1020 assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1021 "FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1022
1023 ModulePassManager MPM;
1024
1025 // Place pseudo probe instrumentation as the first pass of the pipeline to
1026 // minimize the impact of optimization changes.
1027 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1028 Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1029 MPM.addPass(Pass: SampleProfileProbePass(TM));
1030
1031 bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
1032
1033 // In ThinLTO mode, when flattened profile is used, all the available
1034 // profile information will be annotated in PreLink phase so there is
1035 // no need to load the profile again in PostLink.
1036 bool LoadSampleProfile =
1037 HasSampleProfile &&
1038 !(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1039
1040 // During the ThinLTO backend phase we perform early indirect call promotion
1041 // here, before globalopt. Otherwise imported available_externally functions
1042 // look unreferenced and are removed. If we are going to load the sample
1043 // profile then defer until later.
1044 // TODO: See if we can move later and consolidate with the location where
1045 // we perform ICP when we are loading a sample profile.
1046 // TODO: We pass HasSampleProfile (whether there was a sample profile file
1047 // passed to the compile) to the SamplePGO flag of ICP. This is used to
1048 // determine whether the new direct calls are annotated with prof metadata.
1049 // Ideally this should be determined from whether the IR is annotated with
1050 // sample profile, and not whether the a sample profile was provided on the
1051 // command line. E.g. for flattened profiles where we will not be reloading
1052 // the sample profile in the ThinLTO backend, we ideally shouldn't have to
1053 // provide the sample profile file.
1054 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1055 MPM.addPass(Pass: PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
1056
1057 // Create an early function pass manager to cleanup the output of the
1058 // frontend. Not necessary with LTO post link pipelines since the pre link
1059 // pipeline already cleaned up the frontend output.
1060 if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1061 // Do basic inference of function attributes from known properties of system
1062 // libraries and other oracles.
1063 MPM.addPass(Pass: InferFunctionAttrsPass());
1064 MPM.addPass(Pass: CoroEarlyPass());
1065
1066 FunctionPassManager EarlyFPM;
1067 // Lower llvm.expect to metadata before attempting transforms.
1068 // Compare/branch metadata may alter the behavior of passes like
1069 // SimplifyCFG.
1070 EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass());
1071 EarlyFPM.addPass(Pass: SimplifyCFGPass());
1072 EarlyFPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1073 EarlyFPM.addPass(Pass: EarlyCSEPass());
1074 if (Level == OptimizationLevel::O3)
1075 EarlyFPM.addPass(Pass: CallSiteSplittingPass());
1076 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1077 Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1078 }
1079
1080 if (LoadSampleProfile) {
1081 // Annotate sample profile right after early FPM to ensure freshness of
1082 // the debug info.
1083 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1084 PGOOpt->ProfileRemappingFile, Phase));
1085 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1086 // RequireAnalysisPass for PSI before subsequent non-module passes.
1087 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1088 // Do not invoke ICP in the LTOPrelink phase as it makes it hard
1089 // for the profile annotation to be accurate in the LTO backend.
1090 if (!isLTOPreLink(Phase))
1091 // We perform early indirect call promotion here, before globalopt.
1092 // This is important for the ThinLTO backend phase because otherwise
1093 // imported available_externally functions look unreferenced and are
1094 // removed.
1095 MPM.addPass(
1096 Pass: PGOIndirectCallPromotion(true /* IsInLTO */, true /* SamplePGO */));
1097 }
1098
1099 // Try to perform OpenMP specific optimizations on the module. This is a
1100 // (quick!) no-op if there are no OpenMP runtime calls present in the module.
1101 MPM.addPass(Pass: OpenMPOptPass());
1102
1103 if (AttributorRun & AttributorRunOption::MODULE)
1104 MPM.addPass(Pass: AttributorPass());
1105
1106 // Lower type metadata and the type.test intrinsic in the ThinLTO
1107 // post link pipeline after ICP. This is to enable usage of the type
1108 // tests in ICP sequences.
1109 if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1110 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true));
1111
1112 invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
1113
1114 // Interprocedural constant propagation now that basic cleanup has occurred
1115 // and prior to optimizing globals.
1116 // FIXME: This position in the pipeline hasn't been carefully considered in
1117 // years, it should be re-analyzed.
1118 MPM.addPass(Pass: IPSCCPPass(
1119 IPSCCPOptions(/*AllowFuncSpec=*/
1120 Level != OptimizationLevel::Os &&
1121 Level != OptimizationLevel::Oz &&
1122 !isLTOPreLink(Phase))));
1123
1124 // Attach metadata to indirect call sites indicating the set of functions
1125 // they may target at run-time. This should follow IPSCCP.
1126 MPM.addPass(Pass: CalledValuePropagationPass());
1127
1128 // Optimize globals to try and fold them into constants.
1129 MPM.addPass(Pass: GlobalOptPass());
1130
1131 // Create a small function pass pipeline to cleanup after all the global
1132 // optimizations.
1133 FunctionPassManager GlobalCleanupPM;
1134 // FIXME: Should this instead by a run of SROA?
1135 GlobalCleanupPM.addPass(Pass: PromotePass());
1136 GlobalCleanupPM.addPass(Pass: InstCombinePass());
1137 invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1138 GlobalCleanupPM.addPass(
1139 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1140 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1141 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1142
1143 // Invoke the pre-inliner passes for instrumentation PGO or MemProf.
1144 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1145 (PGOOpt->Action == PGOOptions::IRInstr ||
1146 PGOOpt->Action == PGOOptions::IRUse || !PGOOpt->MemoryProfile.empty()))
1147 addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1148
1149 // Add all the requested passes for instrumentation PGO, if requested.
1150 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1151 (PGOOpt->Action == PGOOptions::IRInstr ||
1152 PGOOpt->Action == PGOOptions::IRUse)) {
1153 addPGOInstrPasses(MPM, Level,
1154 /*RunProfileGen=*/PGOOpt->Action == PGOOptions::IRInstr,
1155 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1156 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1157 FS: PGOOpt->FS);
1158 MPM.addPass(Pass: PGOIndirectCallPromotion(false, false));
1159 }
1160 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1161 PGOOpt->CSAction == PGOOptions::CSIRInstr)
1162 MPM.addPass(Pass: PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
1163
1164 if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1165 !PGOOpt->MemoryProfile.empty())
1166 MPM.addPass(Pass: MemProfUsePass(PGOOpt->MemoryProfile, PGOOpt->FS));
1167
1168 // Synthesize function entry counts for non-PGO compilation.
1169 if (EnableSyntheticCounts && !PGOOpt)
1170 MPM.addPass(Pass: SyntheticCountsPropagation());
1171
1172 if (EnablePGOForceFunctionAttrs && PGOOpt)
1173 MPM.addPass(Pass: PGOForceFunctionAttrsPass(PGOOpt->ColdOptType));
1174
1175 MPM.addPass(Pass: AlwaysInlinerPass(/*InsertLifetimeIntrinsics=*/true));
1176
1177 if (EnableModuleInliner)
1178 MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1179 else
1180 MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1181
1182 // Remove any dead arguments exposed by cleanups, constant folding globals,
1183 // and argument promotion.
1184 MPM.addPass(Pass: DeadArgumentEliminationPass());
1185
1186 MPM.addPass(Pass: CoroCleanupPass());
1187
1188 // Optimize globals now that functions are fully simplified.
1189 MPM.addPass(Pass: GlobalOptPass());
1190 MPM.addPass(Pass: GlobalDCEPass());
1191
1192 return MPM;
1193}
1194
1195/// TODO: Should LTO cause any differences to this set of passes?
1196void PassBuilder::addVectorPasses(OptimizationLevel Level,
1197 FunctionPassManager &FPM, bool IsFullLTO) {
1198 FPM.addPass(Pass: LoopVectorizePass(
1199 LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1200
1201 if (EnableInferAlignmentPass)
1202 FPM.addPass(Pass: InferAlignmentPass());
1203 if (IsFullLTO) {
1204 // The vectorizer may have significantly shortened a loop body; unroll
1205 // again. Unroll small loops to hide loop backedge latency and saturate any
1206 // parallel execution resources of an out-of-order processor. We also then
1207 // need to clean up redundancies and loop invariant code.
1208 // FIXME: It would be really good to use a loop-integrated instruction
1209 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1210 // across the loop nests.
1211 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1212 if (EnableUnrollAndJam && PTO.LoopUnrolling)
1213 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1214 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1215 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1216 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1217 PTO.ForgetAllSCEVInLoopUnroll)));
1218 FPM.addPass(Pass: WarnMissedTransformationsPass());
1219 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1220 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1221 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1222 // NOTE: we are very late in the pipeline, and we don't have any LICM
1223 // or SimplifyCFG passes scheduled after us, that would cleanup
1224 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1225 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1226 }
1227
1228 if (!IsFullLTO) {
1229 // Eliminate loads by forwarding stores from the previous iteration to loads
1230 // of the current iteration.
1231 FPM.addPass(Pass: LoopLoadEliminationPass());
1232 }
1233 // Cleanup after the loop optimization passes.
1234 FPM.addPass(Pass: InstCombinePass());
1235
1236 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1237 ExtraVectorPassManager ExtraPasses;
1238 // At higher optimization levels, try to clean up any runtime overlap and
1239 // alignment checks inserted by the vectorizer. We want to track correlated
1240 // runtime checks for two inner loops in the same outer loop, fold any
1241 // common computations, hoist loop-invariant aspects out of any outer loop,
1242 // and unswitch the runtime checks if possible. Once hoisted, we may have
1243 // dead (or speculatable) control flows or more combining opportunities.
1244 ExtraPasses.addPass(Pass: EarlyCSEPass());
1245 ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass());
1246 ExtraPasses.addPass(Pass: InstCombinePass());
1247 LoopPassManager LPM;
1248 LPM.addPass(Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1249 /*AllowSpeculation=*/true));
1250 LPM.addPass(Pass: SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
1251 OptimizationLevel::O3));
1252 ExtraPasses.addPass(
1253 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /*UseMemorySSA=*/true,
1254 /*UseBlockFrequencyInfo=*/true));
1255 ExtraPasses.addPass(
1256 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1257 ExtraPasses.addPass(Pass: InstCombinePass());
1258 FPM.addPass(Pass: std::move(ExtraPasses));
1259 }
1260
1261 // Now that we've formed fast to execute loop structures, we do further
1262 // optimizations. These are run afterward as they might block doing complex
1263 // analyses and transforms such as what are needed for loop vectorization.
1264
1265 // Cleanup after loop vectorization, etc. Simplification passes like CVP and
1266 // GVN, loop transforms, and others have already run, so it's now better to
1267 // convert to more optimized IR using more aggressive simplify CFG options.
1268 // The extra sinking transform can create larger basic blocks, so do this
1269 // before SLP vectorization.
1270 FPM.addPass(Pass: SimplifyCFGPass(SimplifyCFGOptions()
1271 .forwardSwitchCondToPhi(B: true)
1272 .convertSwitchRangeToICmp(B: true)
1273 .convertSwitchToLookupTable(B: true)
1274 .needCanonicalLoops(B: false)
1275 .hoistCommonInsts(B: true)
1276 .sinkCommonInsts(B: true)));
1277
1278 if (IsFullLTO) {
1279 FPM.addPass(Pass: SCCPPass());
1280 FPM.addPass(Pass: InstCombinePass());
1281 FPM.addPass(Pass: BDCEPass());
1282 }
1283
1284 // Optimize parallel scalar instruction chains into SIMD instructions.
1285 if (PTO.SLPVectorization) {
1286 FPM.addPass(Pass: SLPVectorizerPass());
1287 if (Level.getSpeedupLevel() > 1 && ExtraVectorizerPasses) {
1288 FPM.addPass(Pass: EarlyCSEPass());
1289 }
1290 }
1291 // Enhance/cleanup vector code.
1292 FPM.addPass(Pass: VectorCombinePass());
1293
1294 if (!IsFullLTO) {
1295 FPM.addPass(Pass: InstCombinePass());
1296 // Unroll small loops to hide loop backedge latency and saturate any
1297 // parallel execution resources of an out-of-order processor. We also then
1298 // need to clean up redundancies and loop invariant code.
1299 // FIXME: It would be really good to use a loop-integrated instruction
1300 // combiner for cleanup here so that the unrolling and LICM can be pipelined
1301 // across the loop nests.
1302 // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1303 if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1304 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1305 Pass: LoopUnrollAndJamPass(Level.getSpeedupLevel())));
1306 }
1307 FPM.addPass(Pass: LoopUnrollPass(LoopUnrollOptions(
1308 Level.getSpeedupLevel(), /*OnlyWhenForced=*/!PTO.LoopUnrolling,
1309 PTO.ForgetAllSCEVInLoopUnroll)));
1310 FPM.addPass(Pass: WarnMissedTransformationsPass());
1311 // Now that we are done with loop unrolling, be it either by LoopVectorizer,
1312 // or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1313 // become constant-offset, thus enabling SROA and alloca promotion. Do so.
1314 // NOTE: we are very late in the pipeline, and we don't have any LICM
1315 // or SimplifyCFG passes scheduled after us, that would cleanup
1316 // the CFG mess this may created if allowed to modify CFG, so forbid that.
1317 FPM.addPass(Pass: SROAPass(SROAOptions::PreserveCFG));
1318 }
1319
1320 if (EnableInferAlignmentPass)
1321 FPM.addPass(Pass: InferAlignmentPass());
1322 FPM.addPass(Pass: InstCombinePass());
1323
1324 // This is needed for two reasons:
1325 // 1. It works around problems that instcombine introduces, such as sinking
1326 // expensive FP divides into loops containing multiplications using the
1327 // divide result.
1328 // 2. It helps to clean up some loop-invariant code created by the loop
1329 // unroll pass when IsFullLTO=false.
1330 FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1331 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1332 /*AllowSpeculation=*/true),
1333 /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/false));
1334
1335 // Now that we've vectorized and unrolled loops, we may have more refined
1336 // alignment information, try to re-derive it here.
1337 FPM.addPass(Pass: AlignmentFromAssumptionsPass());
1338}
1339
1340ModulePassManager
1341PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1342 ThinOrFullLTOPhase LTOPhase) {
1343 const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1344 ModulePassManager MPM;
1345
1346 // Run partial inlining pass to partially inline functions that have
1347 // large bodies.
1348 if (RunPartialInlining)
1349 MPM.addPass(Pass: PartialInlinerPass());
1350
1351 // Remove avail extern fns and globals definitions since we aren't compiling
1352 // an object file for later LTO. For LTO we want to preserve these so they
1353 // are eligible for inlining at link-time. Note if they are unreferenced they
1354 // will be removed by GlobalDCE later, so this only impacts referenced
1355 // available externally globals. Eventually they will be suppressed during
1356 // codegen, but eliminating here enables more opportunity for GlobalDCE as it
1357 // may make globals referenced by available external functions dead and saves
1358 // running remaining passes on the eliminated functions. These should be
1359 // preserved during prelinking for link-time inlining decisions.
1360 if (!LTOPreLink)
1361 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1362
1363 if (EnableOrderFileInstrumentation)
1364 MPM.addPass(Pass: InstrOrderFilePass());
1365
1366 // Do RPO function attribute inference across the module to forward-propagate
1367 // attributes where applicable.
1368 // FIXME: Is this really an optimization rather than a canonicalization?
1369 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1370
1371 // Do a post inline PGO instrumentation and use pass. This is a context
1372 // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1373 // cross-module inline has not been done yet. The context sensitive
1374 // instrumentation is after all the inlines are done.
1375 if (!LTOPreLink && PGOOpt) {
1376 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1377 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1378 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1379 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1380 FS: PGOOpt->FS);
1381 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1382 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1383 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1384 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1385 FS: PGOOpt->FS);
1386 }
1387
1388 // Re-compute GlobalsAA here prior to function passes. This is particularly
1389 // useful as the above will have inlined, DCE'ed, and function-attr
1390 // propagated everything. We should at this point have a reasonably minimal
1391 // and richly annotated call graph. By computing aliasing and mod/ref
1392 // information for all local globals here, the late loop passes and notably
1393 // the vectorizer will be able to use them to help recognize vectorizable
1394 // memory operations.
1395 if (EnableGlobalAnalyses)
1396 MPM.addPass(Pass: RecomputeGlobalsAAPass());
1397
1398 invokeOptimizerEarlyEPCallbacks(MPM, Level);
1399
1400 FunctionPassManager OptimizePM;
1401 // Scheduling LoopVersioningLICM when inlining is over, because after that
1402 // we may see more accurate aliasing. Reason to run this late is that too
1403 // early versioning may prevent further inlining due to increase of code
1404 // size. Other optimizations which runs later might get benefit of no-alias
1405 // assumption in clone loop.
1406 if (UseLoopVersioningLICM) {
1407 OptimizePM.addPass(
1408 Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass()));
1409 // LoopVersioningLICM pass might increase new LICM opportunities.
1410 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1411 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1412 /*AllowSpeculation=*/true),
1413 /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false));
1414 }
1415
1416 OptimizePM.addPass(Pass: Float2IntPass());
1417 OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass());
1418
1419 if (EnableMatrix) {
1420 OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass());
1421 OptimizePM.addPass(Pass: EarlyCSEPass());
1422 }
1423
1424 // CHR pass should only be applied with the profile information.
1425 // The check is to check the profile summary information in CHR.
1426 if (EnableCHR && Level == OptimizationLevel::O3)
1427 OptimizePM.addPass(Pass: ControlHeightReductionPass());
1428
1429 // FIXME: We need to run some loop optimizations to re-rotate loops after
1430 // simplifycfg and others undo their rotation.
1431
1432 // Optimize the loop execution. These passes operate on entire loop nests
1433 // rather than on each loop in an inside-out manner, and so they are actually
1434 // function passes.
1435
1436 invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1437
1438 LoopPassManager LPM;
1439 // First rotate loops that may have been un-rotated by prior passes.
1440 // Disable header duplication at -Oz.
1441 LPM.addPass(Pass: LoopRotatePass(EnableLoopHeaderDuplication ||
1442 Level != OptimizationLevel::Oz,
1443 LTOPreLink));
1444 // Some loops may have become dead by now. Try to delete them.
1445 // FIXME: see discussion in https://reviews.llvm.org/D112851,
1446 // this may need to be revisited once we run GVN before loop deletion
1447 // in the simplification pipeline.
1448 LPM.addPass(Pass: LoopDeletionPass());
1449 OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1450 Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false));
1451
1452 // Distribute loops to allow partial vectorization. I.e. isolate dependences
1453 // into separate loop that would otherwise inhibit vectorization. This is
1454 // currently only performed for loops marked with the metadata
1455 // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1456 OptimizePM.addPass(Pass: LoopDistributePass());
1457
1458 // Populates the VFABI attribute with the scalar-to-vector mappings
1459 // from the TargetLibraryInfo.
1460 OptimizePM.addPass(Pass: InjectTLIMappings());
1461
1462 addVectorPasses(Level, FPM&: OptimizePM, /* IsFullLTO */ false);
1463
1464 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1465 // canonicalization pass that enables other optimizations. As a result,
1466 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1467 // result too early.
1468 OptimizePM.addPass(Pass: LoopSinkPass());
1469
1470 // And finally clean up LCSSA form before generating code.
1471 OptimizePM.addPass(Pass: InstSimplifyPass());
1472
1473 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1474 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1475 // flattening of blocks.
1476 OptimizePM.addPass(Pass: DivRemPairsPass());
1477
1478 // Try to annotate calls that were created during optimization.
1479 OptimizePM.addPass(Pass: TailCallElimPass());
1480
1481 // LoopSink (and other loop passes since the last simplifyCFG) might have
1482 // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1483 OptimizePM.addPass(
1484 Pass: SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(B: true)));
1485
1486 // Add the core optimizing pipeline.
1487 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1488 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1489
1490 invokeOptimizerLastEPCallbacks(MPM, Level);
1491
1492 // Split out cold code. Splitting is done late to avoid hiding context from
1493 // other optimizations and inadvertently regressing performance. The tradeoff
1494 // is that this has a higher code size cost than splitting early.
1495 if (EnableHotColdSplit && !LTOPreLink)
1496 MPM.addPass(Pass: HotColdSplittingPass());
1497
1498 // Search the code for similar regions of code. If enough similar regions can
1499 // be found where extracting the regions into their own function will decrease
1500 // the size of the program, we extract the regions, a deduplicate the
1501 // structurally similar regions.
1502 if (EnableIROutliner)
1503 MPM.addPass(Pass: IROutlinerPass());
1504
1505 // Merge functions if requested.
1506 if (PTO.MergeFunctions)
1507 MPM.addPass(Pass: MergeFunctionsPass());
1508
1509 // Now we need to do some global optimization transforms.
1510 // FIXME: It would seem like these should come first in the optimization
1511 // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1512 // ordering here.
1513 MPM.addPass(Pass: GlobalDCEPass());
1514 MPM.addPass(Pass: ConstantMergePass());
1515
1516 if (PTO.CallGraphProfile && !LTOPreLink)
1517 MPM.addPass(Pass: CGProfilePass(LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink ||
1518 LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1519
1520 // TODO: Relative look table converter pass caused an issue when full lto is
1521 // enabled. See https://reviews.llvm.org/D94355 for more details.
1522 // Until the issue fixed, disable this pass during pre-linking phase.
1523 if (!LTOPreLink)
1524 MPM.addPass(Pass: RelLookupTableConverterPass());
1525
1526 return MPM;
1527}
1528
1529ModulePassManager
1530PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1531 bool LTOPreLink) {
1532 if (Level == OptimizationLevel::O0)
1533 return buildO0DefaultPipeline(Level, LTOPreLink);
1534
1535 ModulePassManager MPM;
1536
1537 // Convert @llvm.global.annotations to !annotation metadata.
1538 MPM.addPass(Pass: Annotation2MetadataPass());
1539
1540 // Force any function attributes we want the rest of the pipeline to observe.
1541 MPM.addPass(Pass: ForceFunctionAttrsPass());
1542
1543 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1544 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1545
1546 // Apply module pipeline start EP callback.
1547 invokePipelineStartEPCallbacks(MPM, Level);
1548
1549 const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1550 ? ThinOrFullLTOPhase::FullLTOPreLink
1551 : ThinOrFullLTOPhase::None;
1552 // Add the core simplification pipeline.
1553 MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase: LTOPhase));
1554
1555 // Now add the optimization pipeline.
1556 MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase));
1557
1558 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1559 PGOOpt->Action == PGOOptions::SampleUse)
1560 MPM.addPass(Pass: PseudoProbeUpdatePass());
1561
1562 // Emit annotation remarks.
1563 addAnnotationRemarksPass(MPM);
1564
1565 if (LTOPreLink)
1566 addRequiredLTOPreLinkPasses(MPM);
1567 return MPM;
1568}
1569
1570ModulePassManager
1571PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1572 bool EmitSummary) {
1573 ModulePassManager MPM;
1574 if (ThinLTO)
1575 MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1576 else
1577 MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1578 MPM.addPass(Pass: EmbedBitcodePass(ThinLTO, EmitSummary));
1579
1580 // Use the ThinLTO post-link pipeline with sample profiling
1581 if (ThinLTO && PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)
1582 MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /*ImportSummary=*/nullptr));
1583 else {
1584 // otherwise, just use module optimization
1585 MPM.addPass(
1586 Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1587 // Emit annotation remarks.
1588 addAnnotationRemarksPass(MPM);
1589 }
1590 return MPM;
1591}
1592
1593ModulePassManager
1594PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1595 if (Level == OptimizationLevel::O0)
1596 return buildO0DefaultPipeline(Level, /*LTOPreLink*/true);
1597
1598 ModulePassManager MPM;
1599
1600 // Convert @llvm.global.annotations to !annotation metadata.
1601 MPM.addPass(Pass: Annotation2MetadataPass());
1602
1603 // Force any function attributes we want the rest of the pipeline to observe.
1604 MPM.addPass(Pass: ForceFunctionAttrsPass());
1605
1606 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
1607 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
1608
1609 // Apply module pipeline start EP callback.
1610 invokePipelineStartEPCallbacks(MPM, Level);
1611
1612 // If we are planning to perform ThinLTO later, we don't bloat the code with
1613 // unrolling/vectorization/... now. Just simplify the module as much as we
1614 // can.
1615 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1616 Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1617
1618 // Run partial inlining pass to partially inline functions that have
1619 // large bodies.
1620 // FIXME: It isn't clear whether this is really the right place to run this
1621 // in ThinLTO. Because there is another canonicalization and simplification
1622 // phase that will run after the thin link, running this here ends up with
1623 // less information than will be available later and it may grow functions in
1624 // ways that aren't beneficial.
1625 if (RunPartialInlining)
1626 MPM.addPass(Pass: PartialInlinerPass());
1627
1628 if (PGOOpt && PGOOpt->PseudoProbeForProfiling &&
1629 PGOOpt->Action == PGOOptions::SampleUse)
1630 MPM.addPass(Pass: PseudoProbeUpdatePass());
1631
1632 // Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1633 // optimization is going to be done in PostLink stage, but clang can't add
1634 // callbacks there in case of in-process ThinLTO called by linker.
1635 invokeOptimizerEarlyEPCallbacks(MPM, Level);
1636 invokeOptimizerLastEPCallbacks(MPM, Level);
1637
1638 // Emit annotation remarks.
1639 addAnnotationRemarksPass(MPM);
1640
1641 addRequiredLTOPreLinkPasses(MPM);
1642
1643 return MPM;
1644}
1645
1646ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1647 OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1648 ModulePassManager MPM;
1649
1650 if (ImportSummary) {
1651 // For ThinLTO we must apply the context disambiguation decisions early, to
1652 // ensure we can correctly match the callsites to summary data.
1653 if (EnableMemProfContextDisambiguation)
1654 MPM.addPass(Pass: MemProfContextDisambiguation(ImportSummary));
1655
1656 // These passes import type identifier resolutions for whole-program
1657 // devirtualization and CFI. They must run early because other passes may
1658 // disturb the specific instruction patterns that these passes look for,
1659 // creating dependencies on resolutions that may not appear in the summary.
1660 //
1661 // For example, GVN may transform the pattern assume(type.test) appearing in
1662 // two basic blocks into assume(phi(type.test, type.test)), which would
1663 // transform a dependency on a WPD resolution into a dependency on a type
1664 // identifier resolution for CFI.
1665 //
1666 // Also, WPD has access to more precise information than ICP and can
1667 // devirtualize more effectively, so it should operate on the IR first.
1668 //
1669 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1670 // metadata and intrinsics.
1671 MPM.addPass(Pass: WholeProgramDevirtPass(nullptr, ImportSummary));
1672 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, ImportSummary));
1673 }
1674
1675 if (Level == OptimizationLevel::O0) {
1676 // Run a second time to clean up any type tests left behind by WPD for use
1677 // in ICP.
1678 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true));
1679 // Drop available_externally and unreferenced globals. This is necessary
1680 // with ThinLTO in order to avoid leaving undefined references to dead
1681 // globals in the object file.
1682 MPM.addPass(Pass: EliminateAvailableExternallyPass());
1683 MPM.addPass(Pass: GlobalDCEPass());
1684 return MPM;
1685 }
1686
1687 // Add the core simplification pipeline.
1688 MPM.addPass(Pass: buildModuleSimplificationPipeline(
1689 Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1690
1691 // Now add the optimization pipeline.
1692 MPM.addPass(Pass: buildModuleOptimizationPipeline(
1693 Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1694
1695 // Emit annotation remarks.
1696 addAnnotationRemarksPass(MPM);
1697
1698 return MPM;
1699}
1700
1701ModulePassManager
1702PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1703 // FIXME: We should use a customized pre-link pipeline!
1704 return buildPerModuleDefaultPipeline(Level,
1705 /* LTOPreLink */ true);
1706}
1707
1708ModulePassManager
1709PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1710 ModuleSummaryIndex *ExportSummary) {
1711 ModulePassManager MPM;
1712
1713 invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1714
1715 // Create a function that performs CFI checks for cross-DSO calls with targets
1716 // in the current module.
1717 MPM.addPass(Pass: CrossDSOCFIPass());
1718
1719 if (Level == OptimizationLevel::O0) {
1720 // The WPD and LowerTypeTest passes need to run at -O0 to lower type
1721 // metadata and intrinsics.
1722 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1723 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1724 // Run a second time to clean up any type tests left behind by WPD for use
1725 // in ICP.
1726 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true));
1727
1728 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1729
1730 // Emit annotation remarks.
1731 addAnnotationRemarksPass(MPM);
1732
1733 return MPM;
1734 }
1735
1736 if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
1737 // Load sample profile before running the LTO optimization pipeline.
1738 MPM.addPass(Pass: SampleProfileLoaderPass(PGOOpt->ProfileFile,
1739 PGOOpt->ProfileRemappingFile,
1740 ThinOrFullLTOPhase::FullLTOPostLink));
1741 // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1742 // RequireAnalysisPass for PSI before subsequent non-module passes.
1743 MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1744 }
1745
1746 // Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1747 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1748
1749 // Remove unused virtual tables to improve the quality of code generated by
1750 // whole-program devirtualization and bitset lowering.
1751 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
1752
1753 // Do basic inference of function attributes from known properties of system
1754 // libraries and other oracles.
1755 MPM.addPass(Pass: InferFunctionAttrsPass());
1756
1757 if (Level.getSpeedupLevel() > 1) {
1758 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1759 Pass: CallSiteSplittingPass(), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1760
1761 // Indirect call promotion. This should promote all the targets that are
1762 // left by the earlier promotion pass that promotes intra-module targets.
1763 // This two-step promotion is to save the compile time. For LTO, it should
1764 // produce the same result as if we only do promotion here.
1765 MPM.addPass(Pass: PGOIndirectCallPromotion(
1766 true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
1767
1768 // Propagate constants at call sites into the functions they call. This
1769 // opens opportunities for globalopt (and inlining) by substituting function
1770 // pointers passed as arguments to direct uses of functions.
1771 MPM.addPass(Pass: IPSCCPPass(IPSCCPOptions(/*AllowFuncSpec=*/
1772 Level != OptimizationLevel::Os &&
1773 Level != OptimizationLevel::Oz)));
1774
1775 // Attach metadata to indirect call sites indicating the set of functions
1776 // they may target at run-time. This should follow IPSCCP.
1777 MPM.addPass(Pass: CalledValuePropagationPass());
1778 }
1779
1780 // Now deduce any function attributes based in the current code.
1781 MPM.addPass(
1782 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
1783
1784 // Do RPO function attribute inference across the module to forward-propagate
1785 // attributes where applicable.
1786 // FIXME: Is this really an optimization rather than a canonicalization?
1787 MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass());
1788
1789 // Use in-range annotations on GEP indices to split globals where beneficial.
1790 MPM.addPass(Pass: GlobalSplitPass());
1791
1792 // Run whole program optimization of virtual call when the list of callees
1793 // is fixed.
1794 MPM.addPass(Pass: WholeProgramDevirtPass(ExportSummary, nullptr));
1795
1796 // Stop here at -O1.
1797 if (Level == OptimizationLevel::O1) {
1798 // The LowerTypeTestsPass needs to run to lower type metadata and the
1799 // type.test intrinsics. The pass does nothing if CFI is disabled.
1800 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1801 // Run a second time to clean up any type tests left behind by WPD for use
1802 // in ICP (which is performed earlier than this in the regular LTO
1803 // pipeline).
1804 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true));
1805
1806 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1807
1808 // Emit annotation remarks.
1809 addAnnotationRemarksPass(MPM);
1810
1811 return MPM;
1812 }
1813
1814 // Optimize globals to try and fold them into constants.
1815 MPM.addPass(Pass: GlobalOptPass());
1816
1817 // Promote any localized globals to SSA registers.
1818 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass()));
1819
1820 // Linking modules together can lead to duplicate global constant, only
1821 // keep one copy of each constant.
1822 MPM.addPass(Pass: ConstantMergePass());
1823
1824 // Remove unused arguments from functions.
1825 MPM.addPass(Pass: DeadArgumentEliminationPass());
1826
1827 // Reduce the code after globalopt and ipsccp. Both can open up significant
1828 // simplification opportunities, and both can propagate functions through
1829 // function pointers. When this happens, we often have to resolve varargs
1830 // calls, etc, so let instcombine do this.
1831 FunctionPassManager PeepholeFPM;
1832 PeepholeFPM.addPass(Pass: InstCombinePass());
1833 if (Level.getSpeedupLevel() > 1)
1834 PeepholeFPM.addPass(Pass: AggressiveInstCombinePass());
1835 invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
1836
1837 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
1838 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1839
1840 // Note: historically, the PruneEH pass was run first to deduce nounwind and
1841 // generally clean up exception handling overhead. It isn't clear this is
1842 // valuable as the inliner doesn't currently care whether it is inlining an
1843 // invoke or a call.
1844 // Run the inliner now.
1845 if (EnableModuleInliner) {
1846 MPM.addPass(Pass: ModuleInlinerPass(getInlineParamsFromOptLevel(Level),
1847 UseInlineAdvisor,
1848 ThinOrFullLTOPhase::FullLTOPostLink));
1849 } else {
1850 MPM.addPass(Pass: ModuleInlinerWrapperPass(
1851 getInlineParamsFromOptLevel(Level),
1852 /* MandatoryFirst */ true,
1853 InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
1854 .Pass: InlinePass::CGSCCInliner}));
1855 }
1856
1857 // Perform context disambiguation after inlining, since that would reduce the
1858 // amount of additional cloning required to distinguish the allocation
1859 // contexts.
1860 if (EnableMemProfContextDisambiguation)
1861 MPM.addPass(Pass: MemProfContextDisambiguation());
1862
1863 // Optimize globals again after we ran the inliner.
1864 MPM.addPass(Pass: GlobalOptPass());
1865
1866 // Run the OpenMPOpt pass again after global optimizations.
1867 MPM.addPass(Pass: OpenMPOptPass(ThinOrFullLTOPhase::FullLTOPostLink));
1868
1869 // Garbage collect dead functions.
1870 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
1871
1872 // If we didn't decide to inline a function, check to see if we can
1873 // transform it to pass arguments by value instead of by reference.
1874 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: ArgumentPromotionPass()));
1875
1876 FunctionPassManager FPM;
1877 // The IPO Passes may leave cruft around. Clean up after them.
1878 FPM.addPass(Pass: InstCombinePass());
1879 invokePeepholeEPCallbacks(FPM, Level);
1880
1881 if (EnableConstraintElimination)
1882 FPM.addPass(Pass: ConstraintEliminationPass());
1883
1884 FPM.addPass(Pass: JumpThreadingPass());
1885
1886 // Do a post inline PGO instrumentation and use pass. This is a context
1887 // sensitive PGO pass.
1888 if (PGOOpt) {
1889 if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
1890 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/true,
1891 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1892 ProfileFile: PGOOpt->CSProfileGenFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1893 FS: PGOOpt->FS);
1894 else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
1895 addPGOInstrPasses(MPM, Level, /*RunProfileGen=*/false,
1896 /*IsCS=*/true, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate,
1897 ProfileFile: PGOOpt->ProfileFile, ProfileRemappingFile: PGOOpt->ProfileRemappingFile,
1898 FS: PGOOpt->FS);
1899 }
1900
1901 // Break up allocas
1902 FPM.addPass(Pass: SROAPass(SROAOptions::ModifyCFG));
1903
1904 // LTO provides additional opportunities for tailcall elimination due to
1905 // link-time inlining, and visibility of nocapture attribute.
1906 FPM.addPass(Pass: TailCallElimPass());
1907
1908 // Run a few AA driver optimizations here and now to cleanup the code.
1909 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
1910 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1911
1912 MPM.addPass(
1913 Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass()));
1914
1915 // Require the GlobalsAA analysis for the module so we can query it within
1916 // MainFPM.
1917 if (EnableGlobalAnalyses) {
1918 MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
1919 // Invalidate AAManager so it can be recreated and pick up the newly
1920 // available GlobalsAA.
1921 MPM.addPass(
1922 Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
1923 }
1924
1925 FunctionPassManager MainFPM;
1926 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1927 Pass: LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1928 /*AllowSpeculation=*/true),
1929 /*USeMemorySSA=*/UseMemorySSA: true, /*UseBlockFrequencyInfo=*/false));
1930
1931 if (RunNewGVN)
1932 MainFPM.addPass(Pass: NewGVNPass());
1933 else
1934 MainFPM.addPass(Pass: GVNPass());
1935
1936 // Remove dead memcpy()'s.
1937 MainFPM.addPass(Pass: MemCpyOptPass());
1938
1939 // Nuke dead stores.
1940 MainFPM.addPass(Pass: DSEPass());
1941 MainFPM.addPass(Pass: MoveAutoInitPass());
1942 MainFPM.addPass(Pass: MergedLoadStoreMotionPass());
1943
1944 LoopPassManager LPM;
1945 if (EnableLoopFlatten && Level.getSpeedupLevel() > 1)
1946 LPM.addPass(Pass: LoopFlattenPass());
1947 LPM.addPass(Pass: IndVarSimplifyPass());
1948 LPM.addPass(Pass: LoopDeletionPass());
1949 // FIXME: Add loop interchange.
1950
1951 // Unroll small loops and perform peeling.
1952 LPM.addPass(Pass: LoopFullUnrollPass(Level.getSpeedupLevel(),
1953 /* OnlyWhenForced= */ !PTO.LoopUnrolling,
1954 PTO.ForgetAllSCEVInLoopUnroll));
1955 // The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1956 // *All* loop passes must preserve it, in order to be able to use it.
1957 MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1958 Pass: std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/true));
1959
1960 MainFPM.addPass(Pass: LoopDistributePass());
1961
1962 addVectorPasses(Level, FPM&: MainFPM, /* IsFullLTO */ true);
1963
1964 // Run the OpenMPOpt CGSCC pass again late.
1965 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1966 Pass: OpenMPOptCGSCCPass(ThinOrFullLTOPhase::FullLTOPostLink)));
1967
1968 invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
1969 MainFPM.addPass(Pass: JumpThreadingPass());
1970 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
1971 EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1972
1973 // Lower type metadata and the type.test intrinsic. This pass supports
1974 // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
1975 // to be run at link time if CFI is enabled. This pass does nothing if
1976 // CFI is disabled.
1977 MPM.addPass(Pass: LowerTypeTestsPass(ExportSummary, nullptr));
1978 // Run a second time to clean up any type tests left behind by WPD for use
1979 // in ICP (which is performed earlier than this in the regular LTO pipeline).
1980 MPM.addPass(Pass: LowerTypeTestsPass(nullptr, nullptr, true));
1981
1982 // Enable splitting late in the FullLTO post-link pipeline.
1983 if (EnableHotColdSplit)
1984 MPM.addPass(Pass: HotColdSplittingPass());
1985
1986 // Add late LTO optimization passes.
1987 FunctionPassManager LateFPM;
1988
1989 // LoopSink pass sinks instructions hoisted by LICM, which serves as a
1990 // canonicalization pass that enables other optimizations. As a result,
1991 // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1992 // result too early.
1993 LateFPM.addPass(Pass: LoopSinkPass());
1994
1995 // This hoists/decomposes div/rem ops. It should run after other sink/hoist
1996 // passes to avoid re-sinking, but before SimplifyCFG because it can allow
1997 // flattening of blocks.
1998 LateFPM.addPass(Pass: DivRemPairsPass());
1999
2000 // Delete basic blocks, which optimization passes may have killed.
2001 LateFPM.addPass(Pass: SimplifyCFGPass(
2002 SimplifyCFGOptions().convertSwitchRangeToICmp(B: true).hoistCommonInsts(
2003 B: true)));
2004 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2005
2006 // Drop bodies of available eternally objects to improve GlobalDCE.
2007 MPM.addPass(Pass: EliminateAvailableExternallyPass());
2008
2009 // Now that we have optimized the program, discard unreachable functions.
2010 MPM.addPass(Pass: GlobalDCEPass(/*InLTOPostLink=*/true));
2011
2012 if (PTO.MergeFunctions)
2013 MPM.addPass(Pass: MergeFunctionsPass());
2014
2015 if (PTO.CallGraphProfile)
2016 MPM.addPass(Pass: CGProfilePass(/*InLTOPostLink=*/true));
2017
2018 invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2019
2020 // Emit annotation remarks.
2021 addAnnotationRemarksPass(MPM);
2022
2023 return MPM;
2024}
2025
2026ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2027 bool LTOPreLink) {
2028 assert(Level == OptimizationLevel::O0 &&
2029 "buildO0DefaultPipeline should only be used with O0");
2030
2031 ModulePassManager MPM;
2032
2033 // Perform pseudo probe instrumentation in O0 mode. This is for the
2034 // consistency between different build modes. For example, a LTO build can be
2035 // mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2036 // the postlink will require pseudo probe instrumentation in the prelink.
2037 if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
2038 MPM.addPass(Pass: SampleProfileProbePass(TM));
2039
2040 if (PGOOpt && (PGOOpt->Action == PGOOptions::IRInstr ||
2041 PGOOpt->Action == PGOOptions::IRUse))
2042 addPGOInstrPassesForO0(
2043 MPM,
2044 /*RunProfileGen=*/(PGOOpt->Action == PGOOptions::IRInstr),
2045 /*IsCS=*/false, AtomicCounterUpdate: PGOOpt->AtomicCounterUpdate, ProfileFile: PGOOpt->ProfileFile,
2046 ProfileRemappingFile: PGOOpt->ProfileRemappingFile, FS: PGOOpt->FS);
2047
2048 invokePipelineStartEPCallbacks(MPM, Level);
2049
2050 if (PGOOpt && PGOOpt->DebugInfoForProfiling)
2051 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass()));
2052
2053 invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
2054
2055 // Build a minimal pipeline based on the semantics required by LLVM,
2056 // which is just that always inlining occurs. Further, disable generating
2057 // lifetime intrinsics to avoid enabling further optimizations during
2058 // code generation.
2059 MPM.addPass(Pass: AlwaysInlinerPass(
2060 /*InsertLifetimeIntrinsics=*/false));
2061
2062 if (PTO.MergeFunctions)
2063 MPM.addPass(Pass: MergeFunctionsPass());
2064
2065 if (EnableMatrix)
2066 MPM.addPass(
2067 Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass(true)));
2068
2069 if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2070 CGSCCPassManager CGPM;
2071 invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2072 if (!CGPM.isEmpty())
2073 MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2074 }
2075 if (!LateLoopOptimizationsEPCallbacks.empty()) {
2076 LoopPassManager LPM;
2077 invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2078 if (!LPM.isEmpty()) {
2079 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2080 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2081 }
2082 }
2083 if (!LoopOptimizerEndEPCallbacks.empty()) {
2084 LoopPassManager LPM;
2085 invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2086 if (!LPM.isEmpty()) {
2087 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2088 Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2089 }
2090 }
2091 if (!ScalarOptimizerLateEPCallbacks.empty()) {
2092 FunctionPassManager FPM;
2093 invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2094 if (!FPM.isEmpty())
2095 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2096 }
2097
2098 invokeOptimizerEarlyEPCallbacks(MPM, Level);
2099
2100 if (!VectorizerStartEPCallbacks.empty()) {
2101 FunctionPassManager FPM;
2102 invokeVectorizerStartEPCallbacks(FPM, Level);
2103 if (!FPM.isEmpty())
2104 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2105 }
2106
2107 ModulePassManager CoroPM;
2108 CoroPM.addPass(Pass: CoroEarlyPass());
2109 CGSCCPassManager CGPM;
2110 CGPM.addPass(Pass: CoroSplitPass());
2111 CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2112 CoroPM.addPass(Pass: CoroCleanupPass());
2113 CoroPM.addPass(Pass: GlobalDCEPass());
2114 MPM.addPass(Pass: CoroConditionalWrapper(std::move(CoroPM)));
2115
2116 invokeOptimizerLastEPCallbacks(MPM, Level);
2117
2118 if (LTOPreLink)
2119 addRequiredLTOPreLinkPasses(MPM);
2120
2121 MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass()));
2122
2123 return MPM;
2124}
2125
2126AAManager PassBuilder::buildDefaultAAPipeline() {
2127 AAManager AA;
2128
2129 // The order in which these are registered determines their priority when
2130 // being queried.
2131
2132 // First we register the basic alias analysis that provides the majority of
2133 // per-function local AA logic. This is a stateless, on-demand local set of
2134 // AA techniques.
2135 AA.registerFunctionAnalysis<BasicAA>();
2136
2137 // Next we query fast, specialized alias analyses that wrap IR-embedded
2138 // information about aliasing.
2139 AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2140 AA.registerFunctionAnalysis<TypeBasedAA>();
2141
2142 // Add support for querying global aliasing information when available.
2143 // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2144 // analysis, all that the `AAManager` can do is query for any *cached*
2145 // results from `GlobalsAA` through a readonly proxy.
2146 if (EnableGlobalAnalyses)
2147 AA.registerModuleAnalysis<GlobalsAA>();
2148
2149 // Add target-specific alias analyses.
2150 if (TM)
2151 TM->registerDefaultAliasAnalyses(AA);
2152
2153 return AA;
2154}
2155

source code of llvm/lib/Passes/PassBuilderPipelines.cpp