PassBuilderPipelines.cpp source code [llvm/lib/Passes/PassBuilderPipelines.cpp]

1	//===- Construction of pass pipelines -------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	/// \file
9	///
10	/// This file provides the implementation of the PassBuilder based on our
11	/// static pass registry as well as related functionality. It also provides
12	/// helpers to aid in analyzing, debugging, and testing passes and pass
13	/// pipelines.
14	///
15	//===----------------------------------------------------------------------===//
16
17	#include "llvm/ADT/Statistic.h"
18	#include "llvm/Analysis/AliasAnalysis.h"
19	#include "llvm/Analysis/BasicAliasAnalysis.h"
20	#include "llvm/Analysis/CGSCCPassManager.h"
21	#include "llvm/Analysis/GlobalsModRef.h"
22	#include "llvm/Analysis/InlineAdvisor.h"
23	#include "llvm/Analysis/ProfileSummaryInfo.h"
24	#include "llvm/Analysis/ScopedNoAliasAA.h"
25	#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
26	#include "llvm/IR/PassManager.h"
27	#include "llvm/Passes/OptimizationLevel.h"
28	#include "llvm/Passes/PassBuilder.h"
29	#include "llvm/Support/CommandLine.h"
30	#include "llvm/Support/ErrorHandling.h"
31	#include "llvm/Support/PGOOptions.h"
32	#include "llvm/Support/VirtualFileSystem.h"
33	#include "llvm/Target/TargetMachine.h"
34	#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
35	#include "llvm/Transforms/Coroutines/CoroCleanup.h"
36	#include "llvm/Transforms/Coroutines/CoroConditionalWrapper.h"
37	#include "llvm/Transforms/Coroutines/CoroEarly.h"
38	#include "llvm/Transforms/Coroutines/CoroElide.h"
39	#include "llvm/Transforms/Coroutines/CoroSplit.h"
40	#include "llvm/Transforms/HipStdPar/HipStdPar.h"
41	#include "llvm/Transforms/IPO/AlwaysInliner.h"
42	#include "llvm/Transforms/IPO/Annotation2Metadata.h"
43	#include "llvm/Transforms/IPO/ArgumentPromotion.h"
44	#include "llvm/Transforms/IPO/Attributor.h"
45	#include "llvm/Transforms/IPO/CalledValuePropagation.h"
46	#include "llvm/Transforms/IPO/ConstantMerge.h"
47	#include "llvm/Transforms/IPO/CrossDSOCFI.h"
48	#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
49	#include "llvm/Transforms/IPO/ElimAvailExtern.h"
50	#include "llvm/Transforms/IPO/EmbedBitcodePass.h"
51	#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
52	#include "llvm/Transforms/IPO/FunctionAttrs.h"
53	#include "llvm/Transforms/IPO/GlobalDCE.h"
54	#include "llvm/Transforms/IPO/GlobalOpt.h"
55	#include "llvm/Transforms/IPO/GlobalSplit.h"
56	#include "llvm/Transforms/IPO/HotColdSplitting.h"
57	#include "llvm/Transforms/IPO/IROutliner.h"
58	#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
59	#include "llvm/Transforms/IPO/Inliner.h"
60	#include "llvm/Transforms/IPO/LowerTypeTests.h"
61	#include "llvm/Transforms/IPO/MemProfContextDisambiguation.h"
62	#include "llvm/Transforms/IPO/MergeFunctions.h"
63	#include "llvm/Transforms/IPO/ModuleInliner.h"
64	#include "llvm/Transforms/IPO/OpenMPOpt.h"
65	#include "llvm/Transforms/IPO/PartialInlining.h"
66	#include "llvm/Transforms/IPO/SCCP.h"
67	#include "llvm/Transforms/IPO/SampleProfile.h"
68	#include "llvm/Transforms/IPO/SampleProfileProbe.h"
69	#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
70	#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
71	#include "llvm/Transforms/InstCombine/InstCombine.h"
72	#include "llvm/Transforms/Instrumentation/CGProfile.h"
73	#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
74	#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
75	#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
76	#include "llvm/Transforms/Instrumentation/MemProfiler.h"
77	#include "llvm/Transforms/Instrumentation/PGOForceFunctionAttrs.h"
78	#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
79	#include "llvm/Transforms/Scalar/ADCE.h"
80	#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
81	#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
82	#include "llvm/Transforms/Scalar/BDCE.h"
83	#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
84	#include "llvm/Transforms/Scalar/ConstraintElimination.h"
85	#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
86	#include "llvm/Transforms/Scalar/DFAJumpThreading.h"
87	#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
88	#include "llvm/Transforms/Scalar/DivRemPairs.h"
89	#include "llvm/Transforms/Scalar/EarlyCSE.h"
90	#include "llvm/Transforms/Scalar/Float2Int.h"
91	#include "llvm/Transforms/Scalar/GVN.h"
92	#include "llvm/Transforms/Scalar/IndVarSimplify.h"
93	#include "llvm/Transforms/Scalar/InferAlignment.h"
94	#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
95	#include "llvm/Transforms/Scalar/JumpTableToSwitch.h"
96	#include "llvm/Transforms/Scalar/JumpThreading.h"
97	#include "llvm/Transforms/Scalar/LICM.h"
98	#include "llvm/Transforms/Scalar/LoopDeletion.h"
99	#include "llvm/Transforms/Scalar/LoopDistribute.h"
100	#include "llvm/Transforms/Scalar/LoopFlatten.h"
101	#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
102	#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
103	#include "llvm/Transforms/Scalar/LoopInterchange.h"
104	#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
105	#include "llvm/Transforms/Scalar/LoopPassManager.h"
106	#include "llvm/Transforms/Scalar/LoopRotation.h"
107	#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
108	#include "llvm/Transforms/Scalar/LoopSink.h"
109	#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
110	#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
111	#include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
112	#include "llvm/Transforms/Scalar/LowerConstantIntrinsics.h"
113	#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
114	#include "llvm/Transforms/Scalar/LowerMatrixIntrinsics.h"
115	#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
116	#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
117	#include "llvm/Transforms/Scalar/NewGVN.h"
118	#include "llvm/Transforms/Scalar/Reassociate.h"
119	#include "llvm/Transforms/Scalar/SCCP.h"
120	#include "llvm/Transforms/Scalar/SROA.h"
121	#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
122	#include "llvm/Transforms/Scalar/SimplifyCFG.h"
123	#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
124	#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
125	#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
126	#include "llvm/Transforms/Utils/AddDiscriminators.h"
127	#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
128	#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
129	#include "llvm/Transforms/Utils/CountVisits.h"
130	#include "llvm/Transforms/Utils/InjectTLIMappings.h"
131	#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
132	#include "llvm/Transforms/Utils/Mem2Reg.h"
133	#include "llvm/Transforms/Utils/MoveAutoInit.h"
134	#include "llvm/Transforms/Utils/NameAnonGlobals.h"
135	#include "llvm/Transforms/Utils/RelLookupTableConverter.h"
136	#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
137	#include "llvm/Transforms/Vectorize/LoopVectorize.h"
138	#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
139	#include "llvm/Transforms/Vectorize/VectorCombine.h"
140
141	using namespace llvm;
142
143	static cl::opt<InliningAdvisorMode> UseInlineAdvisor(
144	"enable-ml-inliner", cl::init(Val: InliningAdvisorMode::Default), cl::Hidden,
145	cl::desc ("Enable ML policy for inliner. Currently trained for -Oz only"),
146	cl::values(clEnumValN(InliningAdvisorMode::Default, "default",
147	"Heuristics-based inliner version"),
148	clEnumValN(InliningAdvisorMode::Development, "development",
149	"Use development mode (runtime-loadable model)"),
150	clEnumValN(InliningAdvisorMode::Release, "release",
151	"Use release mode (AOT-compiled model)")));
152
153	static cl::opt<bool> EnableSyntheticCounts(
154	"enable-npm-synthetic-counts", cl::Hidden,
155	cl::desc ("Run synthetic function entry count generation "
156	"pass"));
157
158	/// Flag to enable inline deferral during PGO.
159	static cl::opt<bool>
160	EnablePGOInlineDeferral("enable-npm-pgo-inline-deferral", cl::init(Val: true),
161	cl::Hidden,
162	cl::desc ("Enable inline deferral during PGO"));
163
164	static cl::opt<bool> EnableModuleInliner("enable-module-inliner",
165	cl::init(Val: false), cl::Hidden,
166	cl::desc ("Enable module inliner"));
167
168	static cl::opt<bool> PerformMandatoryInliningsFirst(
169	"mandatory-inlining-first", cl::init(Val: false), cl::Hidden,
170	cl::desc ("Perform mandatory inlinings module-wide, before performing "
171	"inlining"));
172
173	static cl::opt<bool> EnableEagerlyInvalidateAnalyses(
174	"eagerly-invalidate-analyses", cl::init(Val: true), cl::Hidden,
175	cl::desc ("Eagerly invalidate more analyses in default pipelines"));
176
177	static cl::opt<bool> EnableMergeFunctions(
178	"enable-merge-functions", cl::init(Val: false), cl::Hidden,
179	cl::desc ("Enable function merging as part of the optimization pipeline"));
180
181	static cl::opt<bool> EnablePostPGOLoopRotation(
182	"enable-post-pgo-loop-rotation", cl::init(Val: true), cl::Hidden,
183	cl::desc ("Run the loop rotation transformation after PGO instrumentation"));
184
185	static cl::opt<bool> EnableGlobalAnalyses(
186	"enable-global-analyses", cl::init(Val: true), cl::Hidden,
187	cl::desc ("Enable inter-procedural analyses"));
188
189	static cl::opt<bool>
190	RunPartialInlining("enable-partial-inlining", cl::init(Val: false), cl::Hidden,
191	cl::desc ("Run Partial inlinining pass"));
192
193	static cl::opt<bool> ExtraVectorizerPasses(
194	"extra-vectorizer-passes", cl::init(Val: false), cl::Hidden,
195	cl::desc ("Run cleanup optimization passes after vectorization"));
196
197	static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(Val: false), cl::Hidden,
198	cl::desc ("Run the NewGVN pass"));
199
200	static cl::opt<bool> EnableLoopInterchange(
201	"enable-loopinterchange", cl::init(Val: false), cl::Hidden,
202	cl::desc ("Enable the experimental LoopInterchange Pass"));
203
204	static cl::opt<bool> EnableUnrollAndJam("enable-unroll-and-jam",
205	cl::init(Val: false), cl::Hidden,
206	cl::desc ("Enable Unroll And Jam Pass"));
207
208	static cl::opt<bool> EnableLoopFlatten("enable-loop-flatten", cl::init(Val: false),
209	cl::Hidden,
210	cl::desc ("Enable the LoopFlatten Pass"));
211
212	// Experimentally allow loop header duplication. This should allow for better
213	// optimization at Oz, since loop-idiom recognition can then recognize things
214	// like memcpy. If this ends up being useful for many targets, we should drop
215	// this flag and make a code generation option that can be controlled
216	// independent of the opt level and exposed through the frontend.
217	static cl::opt<bool> EnableLoopHeaderDuplication(
218	"enable-loop-header-duplication", cl::init(Val: false), cl::Hidden,
219	cl::desc ("Enable loop header duplication at any optimization level"));
220
221	static cl::opt<bool>
222	EnableDFAJumpThreading("enable-dfa-jump-thread",
223	cl::desc ("Enable DFA jump threading"),
224	cl::init(Val: false), cl::Hidden);
225
226	// TODO: turn on and remove flag
227	static cl::opt<bool> EnablePGOForceFunctionAttrs(
228	"enable-pgo-force-function-attrs",
229	cl::desc ("Enable pass to set function attributes based on PGO profiles"),
230	cl::init(Val: false));
231
232	static cl::opt<bool>
233	EnableHotColdSplit("hot-cold-split",
234	cl::desc ("Enable hot-cold splitting pass"));
235
236	static cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(Val: false),
237	cl::Hidden,
238	cl::desc ("Enable ir outliner pass"));
239
240	static cl::opt<bool>
241	DisablePreInliner("disable-preinline", cl::init(Val: false), cl::Hidden,
242	cl::desc ("Disable pre-instrumentation inliner"));
243
244	static cl::opt<int> PreInlineThreshold(
245	"preinline-threshold", cl::Hidden, cl::init(Val: `75`),
246	cl::desc ("Control the amount of inlining in pre-instrumentation inliner "
247	"(default = 75)"));
248
249	static cl::opt<bool>
250	EnableGVNHoist("enable-gvn-hoist",
251	cl::desc ("Enable the GVN hoisting pass (default = off)"));
252
253	static cl::opt<bool>
254	EnableGVNSink("enable-gvn-sink",
255	cl::desc ("Enable the GVN sinking pass (default = off)"));
256
257	static cl::opt<bool> EnableJumpTableToSwitch(
258	"enable-jump-table-to-switch",
259	cl::desc ("Enable JumpTableToSwitch pass (default = off)"));
260
261	// This option is used in simplifying testing SampleFDO optimizations for
262	// profile loading.
263	static cl::opt<bool>
264	EnableCHR("enable-chr", cl::init(Val: true), cl::Hidden,
265	cl::desc ("Enable control height reduction optimization (CHR)"));
266
267	static cl::opt<bool> FlattenedProfileUsed(
268	"flattened-profile-used", cl::init(Val: false), cl::Hidden,
269	cl::desc ("Indicate the sample profile being used is flattened, i.e., "
270	"no inline hierachy exists in the profile"));
271
272	static cl::opt<bool> EnableOrderFileInstrumentation(
273	"enable-order-file-instrumentation", cl::init(Val: false), cl::Hidden,
274	cl::desc ("Enable order file instrumentation (default = off)"));
275
276	static cl::opt<bool>
277	EnableMatrix("enable-matrix", cl::init(Val: false), cl::Hidden,
278	cl::desc ("Enable lowering of the matrix intrinsics"));
279
280	static cl::opt<bool> EnableConstraintElimination(
281	"enable-constraint-elimination", cl::init(Val: true), cl::Hidden,
282	cl::desc (
283	"Enable pass to eliminate conditions based on linear constraints"));
284
285	static cl::opt<AttributorRunOption> AttributorRun(
286	"attributor-enable", cl::Hidden, cl::init(Val: AttributorRunOption::NONE),
287	cl::desc ("Enable the attributor inter-procedural deduction pass"),
288	cl::values(clEnumValN(AttributorRunOption::ALL, "all",
289	"enable all attributor runs"),
290	clEnumValN(AttributorRunOption::MODULE, "module",
291	"enable module-wide attributor runs"),
292	clEnumValN(AttributorRunOption::CGSCC, "cgscc",
293	"enable call graph SCC attributor runs"),
294	clEnumValN(AttributorRunOption::NONE, "none",
295	"disable attributor runs")));
296
297	static cl::opt<bool> UseLoopVersioningLICM(
298	"enable-loop-versioning-licm", cl::init(Val: false), cl::Hidden,
299	cl::desc ("Enable the experimental Loop Versioning LICM pass"));
300
301	namespace llvm {
302	extern cl::opt<bool> EnableMemProfContextDisambiguation;
303
304	extern cl::opt<bool> EnableInferAlignmentPass;
305	} // namespace llvm
306
307	PipelineTuningOptions::PipelineTuningOptions() {
308	LoopInterleaving = true;
309	LoopVectorization = true;
310	SLPVectorization = false;
311	LoopUnrolling = true;
312	ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
313	LicmMssaOptCap = SetLicmMssaOptCap;
314	LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
315	CallGraphProfile = true;
316	UnifiedLTO = false;
317	MergeFunctions = EnableMergeFunctions;
318	InlinerThreshold = -`1`;
319	EagerlyInvalidateAnalyses = EnableEagerlyInvalidateAnalyses;
320	}
321
322	namespace llvm {
323	extern cl::opt<unsigned> MaxDevirtIterations;
324	} // namespace llvm
325
326	void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM,
327	OptimizationLevel Level) {
328	for (auto &C : PeepholeEPCallbacks)
329	C (FPM, Level);
330	}
331	void PassBuilder::invokeLateLoopOptimizationsEPCallbacks(
332	LoopPassManager &LPM, OptimizationLevel Level) {
333	for (auto &C : LateLoopOptimizationsEPCallbacks)
334	C (LPM, Level);
335	}
336	void PassBuilder::invokeLoopOptimizerEndEPCallbacks(LoopPassManager &LPM,
337	OptimizationLevel Level) {
338	for (auto &C : LoopOptimizerEndEPCallbacks)
339	C (LPM, Level);
340	}
341	void PassBuilder::invokeScalarOptimizerLateEPCallbacks(
342	FunctionPassManager &FPM, OptimizationLevel Level) {
343	for (auto &C : ScalarOptimizerLateEPCallbacks)
344	C (FPM, Level);
345	}
346	void PassBuilder::invokeCGSCCOptimizerLateEPCallbacks(CGSCCPassManager &CGPM,
347	OptimizationLevel Level) {
348	for (auto &C : CGSCCOptimizerLateEPCallbacks)
349	C (CGPM, Level);
350	}
351	void PassBuilder::invokeVectorizerStartEPCallbacks(FunctionPassManager &FPM,
352	OptimizationLevel Level) {
353	for (auto &C : VectorizerStartEPCallbacks)
354	C (FPM, Level);
355	}
356	void PassBuilder::invokeOptimizerEarlyEPCallbacks(ModulePassManager &MPM,
357	OptimizationLevel Level) {
358	for (auto &C : OptimizerEarlyEPCallbacks)
359	C (MPM, Level);
360	}
361	void PassBuilder::invokeOptimizerLastEPCallbacks(ModulePassManager &MPM,
362	OptimizationLevel Level) {
363	for (auto &C : OptimizerLastEPCallbacks)
364	C (MPM, Level);
365	}
366	void PassBuilder::invokeFullLinkTimeOptimizationEarlyEPCallbacks(
367	ModulePassManager &MPM, OptimizationLevel Level) {
368	for (auto &C : FullLinkTimeOptimizationEarlyEPCallbacks)
369	C (MPM, Level);
370	}
371	void PassBuilder::invokeFullLinkTimeOptimizationLastEPCallbacks(
372	ModulePassManager &MPM, OptimizationLevel Level) {
373	for (auto &C : FullLinkTimeOptimizationLastEPCallbacks)
374	C (MPM, Level);
375	}
376	void PassBuilder::invokePipelineStartEPCallbacks(ModulePassManager &MPM,
377	OptimizationLevel Level) {
378	for (auto &C : PipelineStartEPCallbacks)
379	C (MPM, Level);
380	}
381	void PassBuilder::invokePipelineEarlySimplificationEPCallbacks(
382	ModulePassManager &MPM, OptimizationLevel Level) {
383	for (auto &C : PipelineEarlySimplificationEPCallbacks)
384	C (MPM, Level);
385	}
386
387	// Helper to add AnnotationRemarksPass.
388	static void addAnnotationRemarksPass(ModulePassManager &MPM) {
389	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass ()));
390	}
391
392	// Helper to check if the current compilation phase is preparing for LTO
393	static bool isLTOPreLink(ThinOrFullLTOPhase Phase) {
394	return Phase == ThinOrFullLTOPhase::ThinLTOPreLink \|\|
395	Phase == ThinOrFullLTOPhase::FullLTOPreLink;
396	}
397
398	// TODO: Investigate the cost/benefit of tail call elimination on debugging.
399	FunctionPassManager
400	PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level,
401	ThinOrFullLTOPhase Phase) {
402
403	FunctionPassManager FPM;
404
405	if (AreStatisticsEnabled())
406	FPM.addPass(Pass: CountVisitsPass ());
407
408	// Form SSA out of local memory accesses after breaking apart aggregates into
409	// scalars.
410	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
411
412	// Catch trivial redundancies
413	FPM.addPass(Pass: EarlyCSEPass (true / Enable mem-ssa. /));
414
415	// Hoisting of scalars and load expressions.
416	FPM.addPass(
417	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
418	FPM.addPass(Pass: InstCombinePass ());
419
420	FPM.addPass(Pass: LibCallsShrinkWrapPass ());
421
422	invokePeepholeEPCallbacks(FPM, Level);
423
424	FPM.addPass(
425	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
426
427	// Form canonically associated expression trees, and simplify the trees using
428	// basic mathematical properties. For example, this will form (nearly)
429	// minimal multiplication trees.
430	FPM.addPass(Pass: ReassociatePass ());
431
432	// Add the primary loop simplification pipeline.
433	// FIXME: Currently this is split into two loop pass pipelines because we run
434	// some function passes in between them. These can and should be removed
435	// and/or replaced by scheduling the loop pass equivalents in the correct
436	// positions. But those equivalent passes aren't powerful enough yet.
437	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
438	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
439	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
440	// `LoopInstSimplify`.
441	LoopPassManager LPM1, LPM2;
442
443	// Simplify the loop body. We do this initially to clean up after other loop
444	// passes run, either when iterating on a loop or on inner loops with
445	// implications on the outer loop.
446	LPM1.addPass(Pass: LoopInstSimplifyPass ());
447	LPM1.addPass(Pass: LoopSimplifyCFGPass ());
448
449	// Try to remove as much code from the loop header as possible,
450	// to reduce amount of IR that will have to be duplicated. However,
451	// do not perform speculative hoisting the first time as LICM
452	// will destroy metadata that may not need to be destroyed if run
453	// after loop rotation.
454	// TODO: Investigate promotion cap for O1.
455	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
456	/AllowSpeculation=/false));
457
458	LPM1.addPass(Pass: LoopRotatePass (/ Disable header duplication / true,
459	isLTOPreLink(Phase)));
460	// TODO: Investigate promotion cap for O1.
461	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
462	/AllowSpeculation=/true));
463	LPM1.addPass(Pass: SimpleLoopUnswitchPass ());
464	if (EnableLoopFlatten)
465	LPM1.addPass(Pass: LoopFlattenPass ());
466
467	LPM2.addPass(Pass: LoopIdiomRecognizePass ());
468	LPM2.addPass(Pass: IndVarSimplifyPass ());
469
470	invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
471
472	LPM2.addPass(Pass: LoopDeletionPass ());
473
474	if (EnableLoopInterchange)
475	LPM2.addPass(Pass: LoopInterchangePass ());
476
477	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
478	// because it changes IR to makes profile annotation in back compile
479	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
480	// attributes so we need to make sure and allow the full unroll pass to pay
481	// attention to it.
482	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
483	PGOOpt ->Action != PGOOptions::SampleUse)
484	LPM2.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
485	/ OnlyWhenForced= / !PTO.LoopUnrolling,
486	PTO.ForgetAllSCEVInLoopUnroll));
487
488	invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
489
490	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
491	/UseMemorySSA=/true,
492	/UseBlockFrequencyInfo=/true));
493	FPM.addPass(
494	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
495	FPM.addPass(Pass: InstCombinePass ());
496	// The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA.
497	// All* loop passes must preserve it, in order to be able to use it.*
498	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
499	/UseMemorySSA=/false,
500	/UseBlockFrequencyInfo=/false));
501
502	// Delete small array after loop unroll.
503	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
504
505	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
506	FPM.addPass(Pass: MemCpyOptPass ());
507
508	// Sparse conditional constant propagation.
509	// FIXME: It isn't clear why we do this after* loop passes rather than*
510	// before...
511	FPM.addPass(Pass: SCCPPass ());
512
513	// Delete dead bit computations (instcombine runs after to fold away the dead
514	// computations, and then ADCE will run later to exploit any new DCE
515	// opportunities that creates).
516	FPM.addPass(Pass: BDCEPass ());
517
518	// Run instcombine after redundancy and dead bit elimination to exploit
519	// opportunities opened up by them.
520	FPM.addPass(Pass: InstCombinePass ());
521	invokePeepholeEPCallbacks(FPM, Level);
522
523	FPM.addPass(Pass: CoroElidePass ());
524
525	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
526
527	// Finally, do an expensive DCE pass to catch all the dead code exposed by
528	// the simplifications and basic cleanup after all the simplifications.
529	// TODO: Investigate if this is too expensive.
530	FPM.addPass(Pass: ADCEPass ());
531	FPM.addPass(
532	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
533	FPM.addPass(Pass: InstCombinePass ());
534	invokePeepholeEPCallbacks(FPM, Level);
535
536	return FPM;
537	}
538
539	FunctionPassManager
540	PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
541	ThinOrFullLTOPhase Phase) {
542	assert(Level != OptimizationLevel::O0 && "Must request optimizations!");
543
544	// The O1 pipeline has a separate pipeline creation function to simplify
545	// construction readability.
546	if (Level.getSpeedupLevel() == `1`)
547	return buildO1FunctionSimplificationPipeline(Level, Phase);
548
549	FunctionPassManager FPM;
550
551	if (AreStatisticsEnabled())
552	FPM.addPass(Pass: CountVisitsPass ());
553
554	// Form SSA out of local memory accesses after breaking apart aggregates into
555	// scalars.
556	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
557
558	// Catch trivial redundancies
559	FPM.addPass(Pass: EarlyCSEPass (true / Enable mem-ssa. /));
560	if (EnableKnowledgeRetention)
561	FPM.addPass(Pass: AssumeSimplifyPass ());
562
563	// Hoisting of scalars and load expressions.
564	if (EnableGVNHoist)
565	FPM.addPass(Pass: GVNHoistPass ());
566
567	// Global value numbering based sinking.
568	if (EnableGVNSink) {
569	FPM.addPass(Pass: GVNSinkPass ());
570	FPM.addPass(
571	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
572	}
573
574	// Speculative execution if the target has divergent branches; otherwise nop.
575	FPM.addPass(Pass: SpeculativeExecutionPass (/ OnlyIfDivergentTarget =/true));
576
577	// Optimize based on known information about branches, and cleanup afterward.
578	FPM.addPass(Pass: JumpThreadingPass ());
579	FPM.addPass(Pass: CorrelatedValuePropagationPass ());
580
581	// Jump table to switch conversion.
582	if (EnableJumpTableToSwitch)
583	FPM.addPass(Pass: JumpTableToSwitchPass ());
584
585	FPM.addPass(
586	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
587	FPM.addPass(Pass: InstCombinePass ());
588	FPM.addPass(Pass: AggressiveInstCombinePass ());
589
590	if (!Level.isOptimizingForSize())
591	FPM.addPass(Pass: LibCallsShrinkWrapPass ());
592
593	invokePeepholeEPCallbacks(FPM, Level);
594
595	// For PGO use pipeline, try to optimize memory intrinsics such as memcpy
596	// using the size value profile. Don't perform this when optimizing for size.
597	if (PGOOpt && PGOOpt ->Action == PGOOptions::IRUse &&
598	!Level.isOptimizingForSize())
599	FPM.addPass(Pass: PGOMemOPSizeOpt ());
600
601	FPM.addPass(Pass: TailCallElimPass ());
602	FPM.addPass(
603	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
604
605	// Form canonically associated expression trees, and simplify the trees using
606	// basic mathematical properties. For example, this will form (nearly)
607	// minimal multiplication trees.
608	FPM.addPass(Pass: ReassociatePass ());
609
610	if (EnableConstraintElimination)
611	FPM.addPass(Pass: ConstraintEliminationPass ());
612
613	// Add the primary loop simplification pipeline.
614	// FIXME: Currently this is split into two loop pass pipelines because we run
615	// some function passes in between them. These can and should be removed
616	// and/or replaced by scheduling the loop pass equivalents in the correct
617	// positions. But those equivalent passes aren't powerful enough yet.
618	// Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
619	// used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
620	// fully replace `SimplifyCFGPass`, and the closest to the other we have is
621	// `LoopInstSimplify`.
622	LoopPassManager LPM1, LPM2;
623
624	// Simplify the loop body. We do this initially to clean up after other loop
625	// passes run, either when iterating on a loop or on inner loops with
626	// implications on the outer loop.
627	LPM1.addPass(Pass: LoopInstSimplifyPass ());
628	LPM1.addPass(Pass: LoopSimplifyCFGPass ());
629
630	// Try to remove as much code from the loop header as possible,
631	// to reduce amount of IR that will have to be duplicated. However,
632	// do not perform speculative hoisting the first time as LICM
633	// will destroy metadata that may not need to be destroyed if run
634	// after loop rotation.
635	// TODO: Investigate promotion cap for O1.
636	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
637	/AllowSpeculation=/false));
638
639	// Disable header duplication in loop rotation at -Oz.
640	LPM1.addPass(Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
641	Level != OptimizationLevel::Oz,
642	isLTOPreLink(Phase)));
643	// TODO: Investigate promotion cap for O1.
644	LPM1.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
645	/AllowSpeculation=/true));
646	LPM1.addPass(
647	Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level == OptimizationLevel::O3));
648	if (EnableLoopFlatten)
649	LPM1.addPass(Pass: LoopFlattenPass ());
650
651	LPM2.addPass(Pass: LoopIdiomRecognizePass ());
652	LPM2.addPass(Pass: IndVarSimplifyPass ());
653
654	{
655	ExtraSimpleLoopUnswitchPassManager ExtraPasses;
656	ExtraPasses.addPass(Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level ==
657	OptimizationLevel::O3));
658	LPM2.addPass(Pass: std::move(ExtraPasses));
659	}
660
661	invokeLateLoopOptimizationsEPCallbacks(LPM&: LPM2, Level);
662
663	LPM2.addPass(Pass: LoopDeletionPass ());
664
665	if (EnableLoopInterchange)
666	LPM2.addPass(Pass: LoopInterchangePass ());
667
668	// Do not enable unrolling in PreLinkThinLTO phase during sample PGO
669	// because it changes IR to makes profile annotation in back compile
670	// inaccurate. The normal unroller doesn't pay attention to forced full unroll
671	// attributes so we need to make sure and allow the full unroll pass to pay
672	// attention to it.
673	if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink \|\| !PGOOpt \|\|
674	PGOOpt ->Action != PGOOptions::SampleUse)
675	LPM2.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
676	/ OnlyWhenForced= / !PTO.LoopUnrolling,
677	PTO.ForgetAllSCEVInLoopUnroll));
678
679	invokeLoopOptimizerEndEPCallbacks(LPM&: LPM2, Level);
680
681	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM1),
682	/UseMemorySSA=/true,
683	/UseBlockFrequencyInfo=/true));
684	FPM.addPass(
685	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
686	FPM.addPass(Pass: InstCombinePass ());
687	// The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass,
688	// LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA.
689	// All* loop passes must preserve it, in order to be able to use it.*
690	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM2),
691	/UseMemorySSA=/false,
692	/UseBlockFrequencyInfo=/false));
693
694	// Delete small array after loop unroll.
695	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
696
697	// Try vectorization/scalarization transforms that are both improvements
698	// themselves and can allow further folds with GVN and InstCombine.
699	FPM.addPass(Pass: VectorCombinePass (/TryEarlyFoldsOnly=/true));
700
701	// Eliminate redundancies.
702	FPM.addPass(Pass: MergedLoadStoreMotionPass ());
703	if (RunNewGVN)
704	FPM.addPass(Pass: NewGVNPass ());
705	else
706	FPM.addPass(Pass: GVNPass ());
707
708	// Sparse conditional constant propagation.
709	// FIXME: It isn't clear why we do this after* loop passes rather than*
710	// before...
711	FPM.addPass(Pass: SCCPPass ());
712
713	// Delete dead bit computations (instcombine runs after to fold away the dead
714	// computations, and then ADCE will run later to exploit any new DCE
715	// opportunities that creates).
716	FPM.addPass(Pass: BDCEPass ());
717
718	// Run instcombine after redundancy and dead bit elimination to exploit
719	// opportunities opened up by them.
720	FPM.addPass(Pass: InstCombinePass ());
721	invokePeepholeEPCallbacks(FPM, Level);
722
723	// Re-consider control flow based optimizations after redundancy elimination,
724	// redo DCE, etc.
725	if (EnableDFAJumpThreading)
726	FPM.addPass(Pass: DFAJumpThreadingPass ());
727
728	FPM.addPass(Pass: JumpThreadingPass ());
729	FPM.addPass(Pass: CorrelatedValuePropagationPass ());
730
731	// Finally, do an expensive DCE pass to catch all the dead code exposed by
732	// the simplifications and basic cleanup after all the simplifications.
733	// TODO: Investigate if this is too expensive.
734	FPM.addPass(Pass: ADCEPass ());
735
736	// Specially optimize memory movement as it doesn't look like dataflow in SSA.
737	FPM.addPass(Pass: MemCpyOptPass ());
738
739	FPM.addPass(Pass: DSEPass ());
740	FPM.addPass(Pass: MoveAutoInitPass ());
741
742	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
743	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
744	/AllowSpeculation=/true),
745	/UseMemorySSA=/true, /UseBlockFrequencyInfo=/false));
746
747	FPM.addPass(Pass: CoroElidePass ());
748
749	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
750
751	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ()
752	.convertSwitchRangeToICmp(B: true)
753	.hoistCommonInsts(B: true)
754	.sinkCommonInsts(B: true)));
755	FPM.addPass(Pass: InstCombinePass ());
756	invokePeepholeEPCallbacks(FPM, Level);
757
758	return FPM;
759	}
760
761	void PassBuilder::addRequiredLTOPreLinkPasses(ModulePassManager &MPM) {
762	MPM.addPass(Pass: CanonicalizeAliasesPass ());
763	MPM.addPass(Pass: NameAnonGlobalPass ());
764	}
765
766	void PassBuilder::addPreInlinerPasses(ModulePassManager &MPM,
767	OptimizationLevel Level,
768	ThinOrFullLTOPhase LTOPhase) {
769	assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
770	if (DisablePreInliner)
771	return;
772	InlineParams IP;
773
774	IP.DefaultThreshold = PreInlineThreshold;
775
776	// FIXME: The hint threshold has the same value used by the regular inliner
777	// when not optimzing for size. This should probably be lowered after
778	// performance testing.
779	// FIXME: this comment is cargo culted from the old pass manager, revisit).
780	IP.HintThreshold = Level.isOptimizingForSize() ? PreInlineThreshold : `325`;
781	ModuleInlinerWrapperPass MIWP(
782	IP, / MandatoryFirst / true,
783	InlineContext{.LTOPhase: LTOPhase, .Pass: InlinePass::EarlyInliner});
784	CGSCCPassManager &CGPipeline = MIWP.getPM();
785
786	FunctionPassManager FPM;
787	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
788	FPM.addPass(Pass: EarlyCSEPass ()); // Catch trivial redundancies.
789	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(
790	B: true))); // Merge & remove basic blocks.
791	FPM.addPass(Pass: InstCombinePass ()); // Combine silly sequences.
792	invokePeepholeEPCallbacks(FPM, Level);
793
794	CGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
795	Pass: std::move(FPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
796
797	MPM.addPass(Pass: std::move(MIWP));
798
799	// Delete anything that is now dead to make sure that we don't instrument
800	// dead code. Instrumentation can end up keeping dead code around and
801	// dramatically increase code size.
802	MPM.addPass(Pass: GlobalDCEPass ());
803	}
804
805	void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM,
806	OptimizationLevel Level, bool RunProfileGen,
807	bool IsCS, bool AtomicCounterUpdate,
808	std::string ProfileFile,
809	std::string ProfileRemappingFile,
810	IntrusiveRefCntPtr<vfs::FileSystem> FS) {
811	assert(Level != OptimizationLevel::O0 && "Not expecting O0 here!");
812
813	if (!RunProfileGen) {
814	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
815	MPM.addPass(
816	Pass: PGOInstrumentationUse (ProfileFile, ProfileRemappingFile, IsCS, FS));
817	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
818	// RequireAnalysisPass for PSI before subsequent non-module passes.
819	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
820	return;
821	}
822
823	// Perform PGO instrumentation.
824	MPM.addPass(Pass: PGOInstrumentationGen (IsCS));
825
826	if (EnablePostPGOLoopRotation) {
827	// Disable header duplication in loop rotation at -Oz.
828	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
829	Pass: createFunctionToLoopPassAdaptor(
830	Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
831	Level != OptimizationLevel::Oz),
832	/UseMemorySSA=/false,
833	/UseBlockFrequencyInfo=/false),
834	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
835	}
836
837	// Add the profile lowering pass.
838	InstrProfOptions Options;
839	if (!ProfileFile.empty())
840	Options.InstrProfileOutput = ProfileFile;
841	// Do counter promotion at Level greater than O0.
842	Options.DoCounterPromotion = true;
843	Options.UseBFIInPromotion = IsCS;
844	Options.Atomic = AtomicCounterUpdate;
845	MPM.addPass(Pass: InstrProfilingLoweringPass (Options, IsCS));
846	}
847
848	void PassBuilder::addPGOInstrPassesForO0(
849	ModulePassManager &MPM, bool RunProfileGen, bool IsCS,
850	bool AtomicCounterUpdate, std::string ProfileFile,
851	std::string ProfileRemappingFile, IntrusiveRefCntPtr<vfs::FileSystem> FS) {
852	if (!RunProfileGen) {
853	assert(!ProfileFile.empty() && "Profile use expecting a profile file!");
854	MPM.addPass(
855	Pass: PGOInstrumentationUse (ProfileFile, ProfileRemappingFile, IsCS, FS));
856	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
857	// RequireAnalysisPass for PSI before subsequent non-module passes.
858	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
859	return;
860	}
861
862	// Perform PGO instrumentation.
863	MPM.addPass(Pass: PGOInstrumentationGen (IsCS));
864	// Add the profile lowering pass.
865	InstrProfOptions Options;
866	if (!ProfileFile.empty())
867	Options.InstrProfileOutput = ProfileFile;
868	// Do not do counter promotion at O0.
869	Options.DoCounterPromotion = false;
870	Options.UseBFIInPromotion = IsCS;
871	Options.Atomic = AtomicCounterUpdate;
872	MPM.addPass(Pass: InstrProfilingLoweringPass (Options, IsCS));
873	}
874
875	static InlineParams getInlineParamsFromOptLevel(OptimizationLevel Level) {
876	return getInlineParams(OptLevel: Level.getSpeedupLevel(), SizeOptLevel: Level.getSizeLevel());
877	}
878
879	ModuleInlinerWrapperPass
880	PassBuilder::buildInlinerPipeline(OptimizationLevel Level,
881	ThinOrFullLTOPhase Phase) {
882	InlineParams IP;
883	if (PTO.InlinerThreshold == -`1`)
884	IP = getInlineParamsFromOptLevel(Level);
885	else
886	IP = getInlineParams(Threshold: PTO.InlinerThreshold);
887	// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
888	// disable hot callsite inline (as much as possible [1]) because it makes
889	// profile annotation in the backend inaccurate.
890	//
891	// [1] Note the cost of a function could be below zero due to erased
892	// prologue / epilogue.
893	if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
894	PGOOpt ->Action == PGOOptions::SampleUse)
895	IP.HotCallSiteThreshold = `0`;
896
897	if (PGOOpt)
898	IP.EnableDeferral = EnablePGOInlineDeferral;
899
900	ModuleInlinerWrapperPass MIWP(IP, PerformMandatoryInliningsFirst,
901	InlineContext{.LTOPhase: Phase, .Pass: InlinePass::CGSCCInliner},
902	UseInlineAdvisor, MaxDevirtIterations);
903
904	// Require the GlobalsAA analysis for the module so we can query it within
905	// the CGSCC pipeline.
906	if (EnableGlobalAnalyses) {
907	MIWP.addModulePass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
908	// Invalidate AAManager so it can be recreated and pick up the newly
909	// available GlobalsAA.
910	MIWP.addModulePass(
911	Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
912	}
913
914	// Require the ProfileSummaryAnalysis for the module so we can query it within
915	// the inliner pass.
916	MIWP.addModulePass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
917
918	// Now begin the main postorder CGSCC pipeline.
919	// FIXME: The current CGSCC pipeline has its origins in the legacy pass
920	// manager and trying to emulate its precise behavior. Much of this doesn't
921	// make a lot of sense and we should revisit the core CGSCC structure.
922	CGSCCPassManager &MainCGPipeline = MIWP.getPM();
923
924	// Note: historically, the PruneEH pass was run first to deduce nounwind and
925	// generally clean up exception handling overhead. It isn't clear this is
926	// valuable as the inliner doesn't currently care whether it is inlining an
927	// invoke or a call.
928
929	if (AttributorRun & AttributorRunOption::CGSCC)
930	MainCGPipeline.addPass(Pass: AttributorCGSCCPass ());
931
932	// Deduce function attributes. We do another run of this after the function
933	// simplification pipeline, so this only needs to run when it could affect the
934	// function simplification pipeline, which is only the case with recursive
935	// functions.
936	MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass (/SkipNonRecursive/ true));
937
938	// When at O3 add argument promotion to the pass pipeline.
939	// FIXME: It isn't at all clear why this should be limited to O3.
940	if (Level == OptimizationLevel::O3)
941	MainCGPipeline.addPass(Pass: ArgumentPromotionPass ());
942
943	// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
944	// there are no OpenMP runtime calls present in the module.
945	if (Level == OptimizationLevel::O2 \|\| Level == OptimizationLevel::O3)
946	MainCGPipeline.addPass(Pass: OpenMPOptCGSCCPass ());
947
948	invokeCGSCCOptimizerLateEPCallbacks(CGPM&: MainCGPipeline, Level);
949
950	// Add the core function simplification pipeline nested inside the
951	// CGSCC walk.
952	MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
953	Pass: buildFunctionSimplificationPipeline(Level, Phase),
954	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses, /NoRerun=/true));
955
956	// Finally, deduce any function attributes based on the fully simplified
957	// function.
958	MainCGPipeline.addPass(Pass: PostOrderFunctionAttrsPass ());
959
960	// Mark that the function is fully simplified and that it shouldn't be
961	// simplified again if we somehow revisit it due to CGSCC mutations unless
962	// it's been modified since.
963	MainCGPipeline.addPass(Pass: createCGSCCToFunctionPassAdaptor(
964	Pass: RequireAnalysisPass<ShouldNotRunFunctionPassesAnalysis, Function>()));
965
966	MainCGPipeline.addPass(Pass: CoroSplitPass (Level != OptimizationLevel::O0));
967
968	// Make sure we don't affect potential future NoRerun CGSCC adaptors.
969	MIWP.addLateModulePass(Pass: createModuleToFunctionPassAdaptor(
970	Pass: InvalidateAnalysisPass<ShouldNotRunFunctionPassesAnalysis>()));
971
972	return MIWP;
973	}
974
975	ModulePassManager
976	PassBuilder::buildModuleInlinerPipeline(OptimizationLevel Level,
977	ThinOrFullLTOPhase Phase) {
978	ModulePassManager MPM;
979
980	InlineParams IP = getInlineParamsFromOptLevel(Level);
981	// For PreLinkThinLTO + SamplePGO, set hot-caller threshold to 0 to
982	// disable hot callsite inline (as much as possible [1]) because it makes
983	// profile annotation in the backend inaccurate.
984	//
985	// [1] Note the cost of a function could be below zero due to erased
986	// prologue / epilogue.
987	if (Phase == ThinOrFullLTOPhase::ThinLTOPreLink && PGOOpt &&
988	PGOOpt ->Action == PGOOptions::SampleUse)
989	IP.HotCallSiteThreshold = `0`;
990
991	if (PGOOpt)
992	IP.EnableDeferral = EnablePGOInlineDeferral;
993
994	// The inline deferral logic is used to avoid losing some
995	// inlining chance in future. It is helpful in SCC inliner, in which
996	// inlining is processed in bottom-up order.
997	// While in module inliner, the inlining order is a priority-based order
998	// by default. The inline deferral is unnecessary there. So we disable the
999	// inline deferral logic in module inliner.
1000	IP.EnableDeferral = false;
1001
1002	MPM.addPass(Pass: ModuleInlinerPass (IP, UseInlineAdvisor, Phase));
1003
1004	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1005	Pass: buildFunctionSimplificationPipeline(Level, Phase),
1006	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1007
1008	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1009	Pass: CoroSplitPass (Level != OptimizationLevel::O0)));
1010
1011	return MPM;
1012	}
1013
1014	ModulePassManager
1015	PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
1016	ThinOrFullLTOPhase Phase) {
1017	assert(Level != OptimizationLevel::O0 &&
1018	"Should not be used for O0 pipeline");
1019
1020	assert(Phase != ThinOrFullLTOPhase::FullLTOPostLink &&
1021	"FullLTOPostLink shouldn't call buildModuleSimplificationPipeline!");
1022
1023	ModulePassManager MPM;
1024
1025	// Place pseudo probe instrumentation as the first pass of the pipeline to
1026	// minimize the impact of optimization changes.
1027	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1028	Phase != ThinOrFullLTOPhase::ThinLTOPostLink)
1029	MPM.addPass(Pass: SampleProfileProbePass (TM));
1030
1031	bool HasSampleProfile = PGOOpt && (PGOOpt ->Action == PGOOptions::SampleUse);
1032
1033	// In ThinLTO mode, when flattened profile is used, all the available
1034	// profile information will be annotated in PreLink phase so there is
1035	// no need to load the profile again in PostLink.
1036	bool LoadSampleProfile =
1037	HasSampleProfile &&
1038	!(FlattenedProfileUsed && Phase == ThinOrFullLTOPhase::ThinLTOPostLink);
1039
1040	// During the ThinLTO backend phase we perform early indirect call promotion
1041	// here, before globalopt. Otherwise imported available_externally functions
1042	// look unreferenced and are removed. If we are going to load the sample
1043	// profile then defer until later.
1044	// TODO: See if we can move later and consolidate with the location where
1045	// we perform ICP when we are loading a sample profile.
1046	// TODO: We pass HasSampleProfile (whether there was a sample profile file
1047	// passed to the compile) to the SamplePGO flag of ICP. This is used to
1048	// determine whether the new direct calls are annotated with prof metadata.
1049	// Ideally this should be determined from whether the IR is annotated with
1050	// sample profile, and not whether the a sample profile was provided on the
1051	// command line. E.g. for flattened profiles where we will not be reloading
1052	// the sample profile in the ThinLTO backend, we ideally shouldn't have to
1053	// provide the sample profile file.
1054	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink && !LoadSampleProfile)
1055	MPM.addPass(Pass: PGOIndirectCallPromotion (true / InLTO /, HasSampleProfile));
1056
1057	// Create an early function pass manager to cleanup the output of the
1058	// frontend. Not necessary with LTO post link pipelines since the pre link
1059	// pipeline already cleaned up the frontend output.
1060	if (Phase != ThinOrFullLTOPhase::ThinLTOPostLink) {
1061	// Do basic inference of function attributes from known properties of system
1062	// libraries and other oracles.
1063	MPM.addPass(Pass: InferFunctionAttrsPass ());
1064	MPM.addPass(Pass: CoroEarlyPass ());
1065
1066	FunctionPassManager EarlyFPM;
1067	// Lower llvm.expect to metadata before attempting transforms.
1068	// Compare/branch metadata may alter the behavior of passes like
1069	// SimplifyCFG.
1070	EarlyFPM.addPass(Pass: LowerExpectIntrinsicPass ());
1071	EarlyFPM.addPass(Pass: SimplifyCFGPass ());
1072	EarlyFPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
1073	EarlyFPM.addPass(Pass: EarlyCSEPass ());
1074	if (Level == OptimizationLevel::O3)
1075	EarlyFPM.addPass(Pass: CallSiteSplittingPass ());
1076	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1077	Pass: std::move(EarlyFPM), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1078	}
1079
1080	if (LoadSampleProfile) {
1081	// Annotate sample profile right after early FPM to ensure freshness of
1082	// the debug info.
1083	MPM.addPass(Pass: SampleProfileLoaderPass (PGOOpt ->ProfileFile,
1084	PGOOpt ->ProfileRemappingFile, Phase));
1085	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1086	// RequireAnalysisPass for PSI before subsequent non-module passes.
1087	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1088	// Do not invoke ICP in the LTOPrelink phase as it makes it hard
1089	// for the profile annotation to be accurate in the LTO backend.
1090	if (!isLTOPreLink(Phase))
1091	// We perform early indirect call promotion here, before globalopt.
1092	// This is important for the ThinLTO backend phase because otherwise
1093	// imported available_externally functions look unreferenced and are
1094	// removed.
1095	MPM.addPass(
1096	Pass: PGOIndirectCallPromotion (true / IsInLTO /, true / SamplePGO /));
1097	}
1098
1099	// Try to perform OpenMP specific optimizations on the module. This is a
1100	// (quick!) no-op if there are no OpenMP runtime calls present in the module.
1101	MPM.addPass(Pass: OpenMPOptPass ());
1102
1103	if (AttributorRun & AttributorRunOption::MODULE)
1104	MPM.addPass(Pass: AttributorPass ());
1105
1106	// Lower type metadata and the type.test intrinsic in the ThinLTO
1107	// post link pipeline after ICP. This is to enable usage of the type
1108	// tests in ICP sequences.
1109	if (Phase == ThinOrFullLTOPhase::ThinLTOPostLink)
1110	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr, true));
1111
1112	invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
1113
1114	// Interprocedural constant propagation now that basic cleanup has occurred
1115	// and prior to optimizing globals.
1116	// FIXME: This position in the pipeline hasn't been carefully considered in
1117	// years, it should be re-analyzed.
1118	MPM.addPass(Pass: IPSCCPPass (
1119	IPSCCPOptions (/AllowFuncSpec=/
1120	Level != OptimizationLevel::Os &&
1121	Level != OptimizationLevel::Oz &&
1122	!isLTOPreLink(Phase))));
1123
1124	// Attach metadata to indirect call sites indicating the set of functions
1125	// they may target at run-time. This should follow IPSCCP.
1126	MPM.addPass(Pass: CalledValuePropagationPass ());
1127
1128	// Optimize globals to try and fold them into constants.
1129	MPM.addPass(Pass: GlobalOptPass ());
1130
1131	// Create a small function pass pipeline to cleanup after all the global
1132	// optimizations.
1133	FunctionPassManager GlobalCleanupPM;
1134	// FIXME: Should this instead by a run of SROA?
1135	GlobalCleanupPM.addPass(Pass: PromotePass ());
1136	GlobalCleanupPM.addPass(Pass: InstCombinePass ());
1137	invokePeepholeEPCallbacks(FPM&: GlobalCleanupPM, Level);
1138	GlobalCleanupPM.addPass(
1139	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
1140	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(GlobalCleanupPM),
1141	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1142
1143	// Invoke the pre-inliner passes for instrumentation PGO or MemProf.
1144	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1145	(PGOOpt ->Action == PGOOptions::IRInstr \|\|
1146	PGOOpt ->Action == PGOOptions::IRUse \|\| !PGOOpt ->MemoryProfile.empty()))
1147	addPreInlinerPasses(MPM, Level, LTOPhase: Phase);
1148
1149	// Add all the requested passes for instrumentation PGO, if requested.
1150	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1151	(PGOOpt ->Action == PGOOptions::IRInstr \|\|
1152	PGOOpt ->Action == PGOOptions::IRUse)) {
1153	addPGOInstrPasses(MPM, Level,
1154	/RunProfileGen=/PGOOpt ->Action == PGOOptions::IRInstr,
1155	/IsCS=/false, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1156	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile,
1157	FS: PGOOpt ->FS);
1158	MPM.addPass(Pass: PGOIndirectCallPromotion (false, false));
1159	}
1160	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1161	PGOOpt ->CSAction == PGOOptions::CSIRInstr)
1162	MPM.addPass(Pass: PGOInstrumentationGenCreateVar (PGOOpt ->CSProfileGenFile));
1163
1164	if (PGOOpt && Phase != ThinOrFullLTOPhase::ThinLTOPostLink &&
1165	!PGOOpt ->MemoryProfile.empty())
1166	MPM.addPass(Pass: MemProfUsePass (PGOOpt ->MemoryProfile, PGOOpt ->FS));
1167
1168	// Synthesize function entry counts for non-PGO compilation.
1169	if (EnableSyntheticCounts && !PGOOpt)
1170	MPM.addPass(Pass: SyntheticCountsPropagation ());
1171
1172	if (EnablePGOForceFunctionAttrs && PGOOpt)
1173	MPM.addPass(Pass: PGOForceFunctionAttrsPass (PGOOpt ->ColdOptType));
1174
1175	MPM.addPass(Pass: AlwaysInlinerPass (/InsertLifetimeIntrinsics=/true));
1176
1177	if (EnableModuleInliner)
1178	MPM.addPass(Pass: buildModuleInlinerPipeline(Level, Phase));
1179	else
1180	MPM.addPass(Pass: buildInlinerPipeline(Level, Phase));
1181
1182	// Remove any dead arguments exposed by cleanups, constant folding globals,
1183	// and argument promotion.
1184	MPM.addPass(Pass: DeadArgumentEliminationPass ());
1185
1186	MPM.addPass(Pass: CoroCleanupPass ());
1187
1188	// Optimize globals now that functions are fully simplified.
1189	MPM.addPass(Pass: GlobalOptPass ());
1190	MPM.addPass(Pass: GlobalDCEPass ());
1191
1192	return MPM;
1193	}
1194
1195	/// TODO: Should LTO cause any differences to this set of passes?
1196	void PassBuilder::addVectorPasses(OptimizationLevel Level,
1197	FunctionPassManager &FPM, bool IsFullLTO) {
1198	FPM.addPass(Pass: LoopVectorizePass (
1199	LoopVectorizeOptions (!PTO.LoopInterleaving, !PTO.LoopVectorization)));
1200
1201	if (EnableInferAlignmentPass)
1202	FPM.addPass(Pass: InferAlignmentPass ());
1203	if (IsFullLTO) {
1204	// The vectorizer may have significantly shortened a loop body; unroll
1205	// again. Unroll small loops to hide loop backedge latency and saturate any
1206	// parallel execution resources of an out-of-order processor. We also then
1207	// need to clean up redundancies and loop invariant code.
1208	// FIXME: It would be really good to use a loop-integrated instruction
1209	// combiner for cleanup here so that the unrolling and LICM can be pipelined
1210	// across the loop nests.
1211	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1212	if (EnableUnrollAndJam && PTO.LoopUnrolling)
1213	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1214	Pass: LoopUnrollAndJamPass (Level.getSpeedupLevel())));
1215	FPM.addPass(Pass: LoopUnrollPass (LoopUnrollOptions (
1216	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
1217	PTO.ForgetAllSCEVInLoopUnroll)));
1218	FPM.addPass(Pass: WarnMissedTransformationsPass ());
1219	// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1220	// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1221	// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1222	// NOTE: we are very late in the pipeline, and we don't have any LICM
1223	// or SimplifyCFG passes scheduled after us, that would cleanup
1224	// the CFG mess this may created if allowed to modify CFG, so forbid that.
1225	FPM.addPass(Pass: SROAPass (SROAOptions::PreserveCFG));
1226	}
1227
1228	if (!IsFullLTO) {
1229	// Eliminate loads by forwarding stores from the previous iteration to loads
1230	// of the current iteration.
1231	FPM.addPass(Pass: LoopLoadEliminationPass ());
1232	}
1233	// Cleanup after the loop optimization passes.
1234	FPM.addPass(Pass: InstCombinePass ());
1235
1236	if (Level.getSpeedupLevel() > `1` && ExtraVectorizerPasses) {
1237	ExtraVectorPassManager ExtraPasses;
1238	// At higher optimization levels, try to clean up any runtime overlap and
1239	// alignment checks inserted by the vectorizer. We want to track correlated
1240	// runtime checks for two inner loops in the same outer loop, fold any
1241	// common computations, hoist loop-invariant aspects out of any outer loop,
1242	// and unswitch the runtime checks if possible. Once hoisted, we may have
1243	// dead (or speculatable) control flows or more combining opportunities.
1244	ExtraPasses.addPass(Pass: EarlyCSEPass ());
1245	ExtraPasses.addPass(Pass: CorrelatedValuePropagationPass ());
1246	ExtraPasses.addPass(Pass: InstCombinePass ());
1247	LoopPassManager LPM;
1248	LPM.addPass(Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1249	/AllowSpeculation=/true));
1250	LPM.addPass(Pass: SimpleLoopUnswitchPass (/ NonTrivial / Level ==
1251	OptimizationLevel::O3));
1252	ExtraPasses.addPass(
1253	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM), /UseMemorySSA=/true,
1254	/UseBlockFrequencyInfo=/true));
1255	ExtraPasses.addPass(
1256	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
1257	ExtraPasses.addPass(Pass: InstCombinePass ());
1258	FPM.addPass(Pass: std::move(ExtraPasses));
1259	}
1260
1261	// Now that we've formed fast to execute loop structures, we do further
1262	// optimizations. These are run afterward as they might block doing complex
1263	// analyses and transforms such as what are needed for loop vectorization.
1264
1265	// Cleanup after loop vectorization, etc. Simplification passes like CVP and
1266	// GVN, loop transforms, and others have already run, so it's now better to
1267	// convert to more optimized IR using more aggressive simplify CFG options.
1268	// The extra sinking transform can create larger basic blocks, so do this
1269	// before SLP vectorization.
1270	FPM.addPass(Pass: SimplifyCFGPass (SimplifyCFGOptions ()
1271	.forwardSwitchCondToPhi(B: true)
1272	.convertSwitchRangeToICmp(B: true)
1273	.convertSwitchToLookupTable(B: true)
1274	.needCanonicalLoops(B: false)
1275	.hoistCommonInsts(B: true)
1276	.sinkCommonInsts(B: true)));
1277
1278	if (IsFullLTO) {
1279	FPM.addPass(Pass: SCCPPass ());
1280	FPM.addPass(Pass: InstCombinePass ());
1281	FPM.addPass(Pass: BDCEPass ());
1282	}
1283
1284	// Optimize parallel scalar instruction chains into SIMD instructions.
1285	if (PTO.SLPVectorization) {
1286	FPM.addPass(Pass: SLPVectorizerPass ());
1287	if (Level.getSpeedupLevel() > `1` && ExtraVectorizerPasses) {
1288	FPM.addPass(Pass: EarlyCSEPass ());
1289	}
1290	}
1291	// Enhance/cleanup vector code.
1292	FPM.addPass(Pass: VectorCombinePass ());
1293
1294	if (!IsFullLTO) {
1295	FPM.addPass(Pass: InstCombinePass ());
1296	// Unroll small loops to hide loop backedge latency and saturate any
1297	// parallel execution resources of an out-of-order processor. We also then
1298	// need to clean up redundancies and loop invariant code.
1299	// FIXME: It would be really good to use a loop-integrated instruction
1300	// combiner for cleanup here so that the unrolling and LICM can be pipelined
1301	// across the loop nests.
1302	// We do UnrollAndJam in a separate LPM to ensure it happens before unroll
1303	if (EnableUnrollAndJam && PTO.LoopUnrolling) {
1304	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1305	Pass: LoopUnrollAndJamPass (Level.getSpeedupLevel())));
1306	}
1307	FPM.addPass(Pass: LoopUnrollPass (LoopUnrollOptions (
1308	Level.getSpeedupLevel(), /OnlyWhenForced=/!PTO.LoopUnrolling,
1309	PTO.ForgetAllSCEVInLoopUnroll)));
1310	FPM.addPass(Pass: WarnMissedTransformationsPass ());
1311	// Now that we are done with loop unrolling, be it either by LoopVectorizer,
1312	// or LoopUnroll passes, some variable-offset GEP's into alloca's could have
1313	// become constant-offset, thus enabling SROA and alloca promotion. Do so.
1314	// NOTE: we are very late in the pipeline, and we don't have any LICM
1315	// or SimplifyCFG passes scheduled after us, that would cleanup
1316	// the CFG mess this may created if allowed to modify CFG, so forbid that.
1317	FPM.addPass(Pass: SROAPass (SROAOptions::PreserveCFG));
1318	}
1319
1320	if (EnableInferAlignmentPass)
1321	FPM.addPass(Pass: InferAlignmentPass ());
1322	FPM.addPass(Pass: InstCombinePass ());
1323
1324	// This is needed for two reasons:
1325	// 1. It works around problems that instcombine introduces, such as sinking
1326	// expensive FP divides into loops containing multiplications using the
1327	// divide result.
1328	// 2. It helps to clean up some loop-invariant code created by the loop
1329	// unroll pass when IsFullLTO=false.
1330	FPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1331	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1332	/AllowSpeculation=/true),
1333	/UseMemorySSA=/true, /UseBlockFrequencyInfo=/false));
1334
1335	// Now that we've vectorized and unrolled loops, we may have more refined
1336	// alignment information, try to re-derive it here.
1337	FPM.addPass(Pass: AlignmentFromAssumptionsPass ());
1338	}
1339
1340	ModulePassManager
1341	PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
1342	ThinOrFullLTOPhase LTOPhase) {
1343	const bool LTOPreLink = isLTOPreLink(Phase: LTOPhase);
1344	ModulePassManager MPM;
1345
1346	// Run partial inlining pass to partially inline functions that have
1347	// large bodies.
1348	if (RunPartialInlining)
1349	MPM.addPass(Pass: PartialInlinerPass ());
1350
1351	// Remove avail extern fns and globals definitions since we aren't compiling
1352	// an object file for later LTO. For LTO we want to preserve these so they
1353	// are eligible for inlining at link-time. Note if they are unreferenced they
1354	// will be removed by GlobalDCE later, so this only impacts referenced
1355	// available externally globals. Eventually they will be suppressed during
1356	// codegen, but eliminating here enables more opportunity for GlobalDCE as it
1357	// may make globals referenced by available external functions dead and saves
1358	// running remaining passes on the eliminated functions. These should be
1359	// preserved during prelinking for link-time inlining decisions.
1360	if (!LTOPreLink)
1361	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
1362
1363	if (EnableOrderFileInstrumentation)
1364	MPM.addPass(Pass: InstrOrderFilePass ());
1365
1366	// Do RPO function attribute inference across the module to forward-propagate
1367	// attributes where applicable.
1368	// FIXME: Is this really an optimization rather than a canonicalization?
1369	MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass ());
1370
1371	// Do a post inline PGO instrumentation and use pass. This is a context
1372	// sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
1373	// cross-module inline has not been done yet. The context sensitive
1374	// instrumentation is after all the inlines are done.
1375	if (!LTOPreLink && PGOOpt) {
1376	if (PGOOpt ->CSAction == PGOOptions::CSIRInstr)
1377	addPGOInstrPasses(MPM, Level, /RunProfileGen=/true,
1378	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1379	ProfileFile: PGOOpt ->CSProfileGenFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile,
1380	FS: PGOOpt ->FS);
1381	else if (PGOOpt ->CSAction == PGOOptions::CSIRUse)
1382	addPGOInstrPasses(MPM, Level, /RunProfileGen=/false,
1383	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1384	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile,
1385	FS: PGOOpt ->FS);
1386	}
1387
1388	// Re-compute GlobalsAA here prior to function passes. This is particularly
1389	// useful as the above will have inlined, DCE'ed, and function-attr
1390	// propagated everything. We should at this point have a reasonably minimal
1391	// and richly annotated call graph. By computing aliasing and mod/ref
1392	// information for all local globals here, the late loop passes and notably
1393	// the vectorizer will be able to use them to help recognize vectorizable
1394	// memory operations.
1395	if (EnableGlobalAnalyses)
1396	MPM.addPass(Pass: RecomputeGlobalsAAPass ());
1397
1398	invokeOptimizerEarlyEPCallbacks(MPM, Level);
1399
1400	FunctionPassManager OptimizePM;
1401	// Scheduling LoopVersioningLICM when inlining is over, because after that
1402	// we may see more accurate aliasing. Reason to run this late is that too
1403	// early versioning may prevent further inlining due to increase of code
1404	// size. Other optimizations which runs later might get benefit of no-alias
1405	// assumption in clone loop.
1406	if (UseLoopVersioningLICM) {
1407	OptimizePM.addPass(
1408	Pass: createFunctionToLoopPassAdaptor(Pass: LoopVersioningLICMPass ()));
1409	// LoopVersioningLICM pass might increase new LICM opportunities.
1410	OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1411	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1412	/AllowSpeculation=/true),
1413	/USeMemorySSA=/UseMemorySSA: true, /UseBlockFrequencyInfo=/false));
1414	}
1415
1416	OptimizePM.addPass(Pass: Float2IntPass ());
1417	OptimizePM.addPass(Pass: LowerConstantIntrinsicsPass ());
1418
1419	if (EnableMatrix) {
1420	OptimizePM.addPass(Pass: LowerMatrixIntrinsicsPass ());
1421	OptimizePM.addPass(Pass: EarlyCSEPass ());
1422	}
1423
1424	// CHR pass should only be applied with the profile information.
1425	// The check is to check the profile summary information in CHR.
1426	if (EnableCHR && Level == OptimizationLevel::O3)
1427	OptimizePM.addPass(Pass: ControlHeightReductionPass ());
1428
1429	// FIXME: We need to run some loop optimizations to re-rotate loops after
1430	// simplifycfg and others undo their rotation.
1431
1432	// Optimize the loop execution. These passes operate on entire loop nests
1433	// rather than on each loop in an inside-out manner, and so they are actually
1434	// function passes.
1435
1436	invokeVectorizerStartEPCallbacks(FPM&: OptimizePM, Level);
1437
1438	LoopPassManager LPM;
1439	// First rotate loops that may have been un-rotated by prior passes.
1440	// Disable header duplication at -Oz.
1441	LPM.addPass(Pass: LoopRotatePass (EnableLoopHeaderDuplication \|\|
1442	Level != OptimizationLevel::Oz,
1443	LTOPreLink));
1444	// Some loops may have become dead by now. Try to delete them.
1445	// FIXME: see discussion in https://reviews.llvm.org/D112851,
1446	// this may need to be revisited once we run GVN before loop deletion
1447	// in the simplification pipeline.
1448	LPM.addPass(Pass: LoopDeletionPass ());
1449	OptimizePM.addPass(Pass: createFunctionToLoopPassAdaptor(
1450	Pass: std::move(LPM), /UseMemorySSA=/false, /UseBlockFrequencyInfo=/false));
1451
1452	// Distribute loops to allow partial vectorization. I.e. isolate dependences
1453	// into separate loop that would otherwise inhibit vectorization. This is
1454	// currently only performed for loops marked with the metadata
1455	// llvm.loop.distribute=true or when -enable-loop-distribute is specified.
1456	OptimizePM.addPass(Pass: LoopDistributePass ());
1457
1458	// Populates the VFABI attribute with the scalar-to-vector mappings
1459	// from the TargetLibraryInfo.
1460	OptimizePM.addPass(Pass: InjectTLIMappings ());
1461
1462	addVectorPasses(Level, FPM&: OptimizePM, / IsFullLTO / false);
1463
1464	// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1465	// canonicalization pass that enables other optimizations. As a result,
1466	// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1467	// result too early.
1468	OptimizePM.addPass(Pass: LoopSinkPass ());
1469
1470	// And finally clean up LCSSA form before generating code.
1471	OptimizePM.addPass(Pass: InstSimplifyPass ());
1472
1473	// This hoists/decomposes div/rem ops. It should run after other sink/hoist
1474	// passes to avoid re-sinking, but before SimplifyCFG because it can allow
1475	// flattening of blocks.
1476	OptimizePM.addPass(Pass: DivRemPairsPass ());
1477
1478	// Try to annotate calls that were created during optimization.
1479	OptimizePM.addPass(Pass: TailCallElimPass ());
1480
1481	// LoopSink (and other loop passes since the last simplifyCFG) might have
1482	// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
1483	OptimizePM.addPass(
1484	Pass: SimplifyCFGPass (SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true)));
1485
1486	// Add the core optimizing pipeline.
1487	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(OptimizePM),
1488	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1489
1490	invokeOptimizerLastEPCallbacks(MPM, Level);
1491
1492	// Split out cold code. Splitting is done late to avoid hiding context from
1493	// other optimizations and inadvertently regressing performance. The tradeoff
1494	// is that this has a higher code size cost than splitting early.
1495	if (EnableHotColdSplit && !LTOPreLink)
1496	MPM.addPass(Pass: HotColdSplittingPass ());
1497
1498	// Search the code for similar regions of code. If enough similar regions can
1499	// be found where extracting the regions into their own function will decrease
1500	// the size of the program, we extract the regions, a deduplicate the
1501	// structurally similar regions.
1502	if (EnableIROutliner)
1503	MPM.addPass(Pass: IROutlinerPass ());
1504
1505	// Merge functions if requested.
1506	if (PTO.MergeFunctions)
1507	MPM.addPass(Pass: MergeFunctionsPass ());
1508
1509	// Now we need to do some global optimization transforms.
1510	// FIXME: It would seem like these should come first in the optimization
1511	// pipeline and maybe be the bottom of the canonicalization pipeline? Weird
1512	// ordering here.
1513	MPM.addPass(Pass: GlobalDCEPass ());
1514	MPM.addPass(Pass: ConstantMergePass ());
1515
1516	if (PTO.CallGraphProfile && !LTOPreLink)
1517	MPM.addPass(Pass: CGProfilePass (LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink \|\|
1518	LTOPhase == ThinOrFullLTOPhase::ThinLTOPostLink));
1519
1520	// TODO: Relative look table converter pass caused an issue when full lto is
1521	// enabled. See https://reviews.llvm.org/D94355 for more details.
1522	// Until the issue fixed, disable this pass during pre-linking phase.
1523	if (!LTOPreLink)
1524	MPM.addPass(Pass: RelLookupTableConverterPass ());
1525
1526	return MPM;
1527	}
1528
1529	ModulePassManager
1530	PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
1531	bool LTOPreLink) {
1532	if (Level == OptimizationLevel::O0)
1533	return buildO0DefaultPipeline(Level, LTOPreLink);
1534
1535	ModulePassManager MPM;
1536
1537	// Convert @llvm.global.annotations to !annotation metadata.
1538	MPM.addPass(Pass: Annotation2MetadataPass ());
1539
1540	// Force any function attributes we want the rest of the pipeline to observe.
1541	MPM.addPass(Pass: ForceFunctionAttrsPass ());
1542
1543	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
1544	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
1545
1546	// Apply module pipeline start EP callback.
1547	invokePipelineStartEPCallbacks(MPM, Level);
1548
1549	const ThinOrFullLTOPhase LTOPhase = LTOPreLink
1550	? ThinOrFullLTOPhase::FullLTOPreLink
1551	: ThinOrFullLTOPhase::None;
1552	// Add the core simplification pipeline.
1553	MPM.addPass(Pass: buildModuleSimplificationPipeline(Level, Phase: LTOPhase));
1554
1555	// Now add the optimization pipeline.
1556	MPM.addPass(Pass: buildModuleOptimizationPipeline(Level, LTOPhase));
1557
1558	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1559	PGOOpt ->Action == PGOOptions::SampleUse)
1560	MPM.addPass(Pass: PseudoProbeUpdatePass ());
1561
1562	// Emit annotation remarks.
1563	addAnnotationRemarksPass(MPM);
1564
1565	if (LTOPreLink)
1566	addRequiredLTOPreLinkPasses(MPM);
1567	return MPM;
1568	}
1569
1570	ModulePassManager
1571	PassBuilder::buildFatLTODefaultPipeline(OptimizationLevel Level, bool ThinLTO,
1572	bool EmitSummary) {
1573	ModulePassManager MPM;
1574	if (ThinLTO)
1575	MPM.addPass(Pass: buildThinLTOPreLinkDefaultPipeline(Level));
1576	else
1577	MPM.addPass(Pass: buildLTOPreLinkDefaultPipeline(Level));
1578	MPM.addPass(Pass: EmbedBitcodePass (ThinLTO, EmitSummary));
1579
1580	// Use the ThinLTO post-link pipeline with sample profiling
1581	if (ThinLTO && PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse)
1582	MPM.addPass(Pass: buildThinLTODefaultPipeline(Level, /ImportSummary=/nullptr));
1583	else {
1584	// otherwise, just use module optimization
1585	MPM.addPass(
1586	Pass: buildModuleOptimizationPipeline(Level, LTOPhase: ThinOrFullLTOPhase::None));
1587	// Emit annotation remarks.
1588	addAnnotationRemarksPass(MPM);
1589	}
1590	return MPM;
1591	}
1592
1593	ModulePassManager
1594	PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1595	if (Level == OptimizationLevel::O0)
1596	return buildO0DefaultPipeline(Level, /LTOPreLink/true);
1597
1598	ModulePassManager MPM;
1599
1600	// Convert @llvm.global.annotations to !annotation metadata.
1601	MPM.addPass(Pass: Annotation2MetadataPass ());
1602
1603	// Force any function attributes we want the rest of the pipeline to observe.
1604	MPM.addPass(Pass: ForceFunctionAttrsPass ());
1605
1606	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
1607	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
1608
1609	// Apply module pipeline start EP callback.
1610	invokePipelineStartEPCallbacks(MPM, Level);
1611
1612	// If we are planning to perform ThinLTO later, we don't bloat the code with
1613	// unrolling/vectorization/... now. Just simplify the module as much as we
1614	// can.
1615	MPM.addPass(Pass: buildModuleSimplificationPipeline(
1616	Level, Phase: ThinOrFullLTOPhase::ThinLTOPreLink));
1617
1618	// Run partial inlining pass to partially inline functions that have
1619	// large bodies.
1620	// FIXME: It isn't clear whether this is really the right place to run this
1621	// in ThinLTO. Because there is another canonicalization and simplification
1622	// phase that will run after the thin link, running this here ends up with
1623	// less information than will be available later and it may grow functions in
1624	// ways that aren't beneficial.
1625	if (RunPartialInlining)
1626	MPM.addPass(Pass: PartialInlinerPass ());
1627
1628	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling &&
1629	PGOOpt ->Action == PGOOptions::SampleUse)
1630	MPM.addPass(Pass: PseudoProbeUpdatePass ());
1631
1632	// Handle Optimizer{Early,Last}EPCallbacks added by clang on PreLink. Actual
1633	// optimization is going to be done in PostLink stage, but clang can't add
1634	// callbacks there in case of in-process ThinLTO called by linker.
1635	invokeOptimizerEarlyEPCallbacks(MPM, Level);
1636	invokeOptimizerLastEPCallbacks(MPM, Level);
1637
1638	// Emit annotation remarks.
1639	addAnnotationRemarksPass(MPM);
1640
1641	addRequiredLTOPreLinkPasses(MPM);
1642
1643	return MPM;
1644	}
1645
1646	ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
1647	OptimizationLevel Level, const ModuleSummaryIndex *ImportSummary) {
1648	ModulePassManager MPM;
1649
1650	if (ImportSummary) {
1651	// For ThinLTO we must apply the context disambiguation decisions early, to
1652	// ensure we can correctly match the callsites to summary data.
1653	if (EnableMemProfContextDisambiguation)
1654	MPM.addPass(Pass: MemProfContextDisambiguation (ImportSummary));
1655
1656	// These passes import type identifier resolutions for whole-program
1657	// devirtualization and CFI. They must run early because other passes may
1658	// disturb the specific instruction patterns that these passes look for,
1659	// creating dependencies on resolutions that may not appear in the summary.
1660	//
1661	// For example, GVN may transform the pattern assume(type.test) appearing in
1662	// two basic blocks into assume(phi(type.test, type.test)), which would
1663	// transform a dependency on a WPD resolution into a dependency on a type
1664	// identifier resolution for CFI.
1665	//
1666	// Also, WPD has access to more precise information than ICP and can
1667	// devirtualize more effectively, so it should operate on the IR first.
1668	//
1669	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1670	// metadata and intrinsics.
1671	MPM.addPass(Pass: WholeProgramDevirtPass (nullptr, ImportSummary));
1672	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, ImportSummary));
1673	}
1674
1675	if (Level == OptimizationLevel::O0) {
1676	// Run a second time to clean up any type tests left behind by WPD for use
1677	// in ICP.
1678	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr, true));
1679	// Drop available_externally and unreferenced globals. This is necessary
1680	// with ThinLTO in order to avoid leaving undefined references to dead
1681	// globals in the object file.
1682	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
1683	MPM.addPass(Pass: GlobalDCEPass ());
1684	return MPM;
1685	}
1686
1687	// Add the core simplification pipeline.
1688	MPM.addPass(Pass: buildModuleSimplificationPipeline(
1689	Level, Phase: ThinOrFullLTOPhase::ThinLTOPostLink));
1690
1691	// Now add the optimization pipeline.
1692	MPM.addPass(Pass: buildModuleOptimizationPipeline(
1693	Level, LTOPhase: ThinOrFullLTOPhase::ThinLTOPostLink));
1694
1695	// Emit annotation remarks.
1696	addAnnotationRemarksPass(MPM);
1697
1698	return MPM;
1699	}
1700
1701	ModulePassManager
1702	PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
1703	// FIXME: We should use a customized pre-link pipeline!
1704	return buildPerModuleDefaultPipeline(Level,
1705	/ LTOPreLink / true);
1706	}
1707
1708	ModulePassManager
1709	PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
1710	ModuleSummaryIndex *ExportSummary) {
1711	ModulePassManager MPM;
1712
1713	invokeFullLinkTimeOptimizationEarlyEPCallbacks(MPM, Level);
1714
1715	// Create a function that performs CFI checks for cross-DSO calls with targets
1716	// in the current module.
1717	MPM.addPass(Pass: CrossDSOCFIPass ());
1718
1719	if (Level == OptimizationLevel::O0) {
1720	// The WPD and LowerTypeTest passes need to run at -O0 to lower type
1721	// metadata and intrinsics.
1722	MPM.addPass(Pass: WholeProgramDevirtPass (ExportSummary, nullptr));
1723	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
1724	// Run a second time to clean up any type tests left behind by WPD for use
1725	// in ICP.
1726	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr, true));
1727
1728	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1729
1730	// Emit annotation remarks.
1731	addAnnotationRemarksPass(MPM);
1732
1733	return MPM;
1734	}
1735
1736	if (PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse) {
1737	// Load sample profile before running the LTO optimization pipeline.
1738	MPM.addPass(Pass: SampleProfileLoaderPass (PGOOpt ->ProfileFile,
1739	PGOOpt ->ProfileRemappingFile,
1740	ThinOrFullLTOPhase::FullLTOPostLink));
1741	// Cache ProfileSummaryAnalysis once to avoid the potential need to insert
1742	// RequireAnalysisPass for PSI before subsequent non-module passes.
1743	MPM.addPass(Pass: RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
1744	}
1745
1746	// Try to run OpenMP optimizations, quick no-op if no OpenMP metadata present.
1747	MPM.addPass(Pass: OpenMPOptPass (ThinOrFullLTOPhase::FullLTOPostLink));
1748
1749	// Remove unused virtual tables to improve the quality of code generated by
1750	// whole-program devirtualization and bitset lowering.
1751	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
1752
1753	// Do basic inference of function attributes from known properties of system
1754	// libraries and other oracles.
1755	MPM.addPass(Pass: InferFunctionAttrsPass ());
1756
1757	if (Level.getSpeedupLevel() > `1`) {
1758	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
1759	Pass: CallSiteSplittingPass (), EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1760
1761	// Indirect call promotion. This should promote all the targets that are
1762	// left by the earlier promotion pass that promotes intra-module targets.
1763	// This two-step promotion is to save the compile time. For LTO, it should
1764	// produce the same result as if we only do promotion here.
1765	MPM.addPass(Pass: PGOIndirectCallPromotion (
1766	true / InLTO /, PGOOpt && PGOOpt ->Action == PGOOptions::SampleUse));
1767
1768	// Propagate constants at call sites into the functions they call. This
1769	// opens opportunities for globalopt (and inlining) by substituting function
1770	// pointers passed as arguments to direct uses of functions.
1771	MPM.addPass(Pass: IPSCCPPass (IPSCCPOptions (/AllowFuncSpec=/
1772	Level != OptimizationLevel::Os &&
1773	Level != OptimizationLevel::Oz)));
1774
1775	// Attach metadata to indirect call sites indicating the set of functions
1776	// they may target at run-time. This should follow IPSCCP.
1777	MPM.addPass(Pass: CalledValuePropagationPass ());
1778	}
1779
1780	// Now deduce any function attributes based in the current code.
1781	MPM.addPass(
1782	Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass ()));
1783
1784	// Do RPO function attribute inference across the module to forward-propagate
1785	// attributes where applicable.
1786	// FIXME: Is this really an optimization rather than a canonicalization?
1787	MPM.addPass(Pass: ReversePostOrderFunctionAttrsPass ());
1788
1789	// Use in-range annotations on GEP indices to split globals where beneficial.
1790	MPM.addPass(Pass: GlobalSplitPass ());
1791
1792	// Run whole program optimization of virtual call when the list of callees
1793	// is fixed.
1794	MPM.addPass(Pass: WholeProgramDevirtPass (ExportSummary, nullptr));
1795
1796	// Stop here at -O1.
1797	if (Level == OptimizationLevel::O1) {
1798	// The LowerTypeTestsPass needs to run to lower type metadata and the
1799	// type.test intrinsics. The pass does nothing if CFI is disabled.
1800	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
1801	// Run a second time to clean up any type tests left behind by WPD for use
1802	// in ICP (which is performed earlier than this in the regular LTO
1803	// pipeline).
1804	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr, true));
1805
1806	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
1807
1808	// Emit annotation remarks.
1809	addAnnotationRemarksPass(MPM);
1810
1811	return MPM;
1812	}
1813
1814	// Optimize globals to try and fold them into constants.
1815	MPM.addPass(Pass: GlobalOptPass ());
1816
1817	// Promote any localized globals to SSA registers.
1818	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: PromotePass ()));
1819
1820	// Linking modules together can lead to duplicate global constant, only
1821	// keep one copy of each constant.
1822	MPM.addPass(Pass: ConstantMergePass ());
1823
1824	// Remove unused arguments from functions.
1825	MPM.addPass(Pass: DeadArgumentEliminationPass ());
1826
1827	// Reduce the code after globalopt and ipsccp. Both can open up significant
1828	// simplification opportunities, and both can propagate functions through
1829	// function pointers. When this happens, we often have to resolve varargs
1830	// calls, etc, so let instcombine do this.
1831	FunctionPassManager PeepholeFPM;
1832	PeepholeFPM.addPass(Pass: InstCombinePass ());
1833	if (Level.getSpeedupLevel() > `1`)
1834	PeepholeFPM.addPass(Pass: AggressiveInstCombinePass ());
1835	invokePeepholeEPCallbacks(FPM&: PeepholeFPM, Level);
1836
1837	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(PeepholeFPM),
1838	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1839
1840	// Note: historically, the PruneEH pass was run first to deduce nounwind and
1841	// generally clean up exception handling overhead. It isn't clear this is
1842	// valuable as the inliner doesn't currently care whether it is inlining an
1843	// invoke or a call.
1844	// Run the inliner now.
1845	if (EnableModuleInliner) {
1846	MPM.addPass(Pass: ModuleInlinerPass (getInlineParamsFromOptLevel(Level),
1847	UseInlineAdvisor,
1848	ThinOrFullLTOPhase::FullLTOPostLink));
1849	} else {
1850	MPM.addPass(Pass: ModuleInlinerWrapperPass (
1851	getInlineParamsFromOptLevel(Level),
1852	/ MandatoryFirst / true,
1853	InlineContext{.LTOPhase: ThinOrFullLTOPhase::FullLTOPostLink,
1854	.Pass: InlinePass::CGSCCInliner}));
1855	}
1856
1857	// Perform context disambiguation after inlining, since that would reduce the
1858	// amount of additional cloning required to distinguish the allocation
1859	// contexts.
1860	if (EnableMemProfContextDisambiguation)
1861	MPM.addPass(Pass: MemProfContextDisambiguation ());
1862
1863	// Optimize globals again after we ran the inliner.
1864	MPM.addPass(Pass: GlobalOptPass ());
1865
1866	// Run the OpenMPOpt pass again after global optimizations.
1867	MPM.addPass(Pass: OpenMPOptPass (ThinOrFullLTOPhase::FullLTOPostLink));
1868
1869	// Garbage collect dead functions.
1870	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
1871
1872	// If we didn't decide to inline a function, check to see if we can
1873	// transform it to pass arguments by value instead of by reference.
1874	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: ArgumentPromotionPass ()));
1875
1876	FunctionPassManager FPM;
1877	// The IPO Passes may leave cruft around. Clean up after them.
1878	FPM.addPass(Pass: InstCombinePass ());
1879	invokePeepholeEPCallbacks(FPM, Level);
1880
1881	if (EnableConstraintElimination)
1882	FPM.addPass(Pass: ConstraintEliminationPass ());
1883
1884	FPM.addPass(Pass: JumpThreadingPass ());
1885
1886	// Do a post inline PGO instrumentation and use pass. This is a context
1887	// sensitive PGO pass.
1888	if (PGOOpt) {
1889	if (PGOOpt ->CSAction == PGOOptions::CSIRInstr)
1890	addPGOInstrPasses(MPM, Level, /RunProfileGen=/true,
1891	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1892	ProfileFile: PGOOpt ->CSProfileGenFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile,
1893	FS: PGOOpt ->FS);
1894	else if (PGOOpt ->CSAction == PGOOptions::CSIRUse)
1895	addPGOInstrPasses(MPM, Level, /RunProfileGen=/false,
1896	/IsCS=/true, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate,
1897	ProfileFile: PGOOpt ->ProfileFile, ProfileRemappingFile: PGOOpt ->ProfileRemappingFile,
1898	FS: PGOOpt ->FS);
1899	}
1900
1901	// Break up allocas
1902	FPM.addPass(Pass: SROAPass (SROAOptions::ModifyCFG));
1903
1904	// LTO provides additional opportunities for tailcall elimination due to
1905	// link-time inlining, and visibility of nocapture attribute.
1906	FPM.addPass(Pass: TailCallElimPass ());
1907
1908	// Run a few AA driver optimizations here and now to cleanup the code.
1909	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM),
1910	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1911
1912	MPM.addPass(
1913	Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: PostOrderFunctionAttrsPass ()));
1914
1915	// Require the GlobalsAA analysis for the module so we can query it within
1916	// MainFPM.
1917	if (EnableGlobalAnalyses) {
1918	MPM.addPass(Pass: RequireAnalysisPass<GlobalsAA, Module>());
1919	// Invalidate AAManager so it can be recreated and pick up the newly
1920	// available GlobalsAA.
1921	MPM.addPass(
1922	Pass: createModuleToFunctionPassAdaptor(Pass: InvalidateAnalysisPass<AAManager>()));
1923	}
1924
1925	FunctionPassManager MainFPM;
1926	MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1927	Pass: LICMPass (PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
1928	/AllowSpeculation=/true),
1929	/USeMemorySSA=/UseMemorySSA: true, /UseBlockFrequencyInfo=/false));
1930
1931	if (RunNewGVN)
1932	MainFPM.addPass(Pass: NewGVNPass ());
1933	else
1934	MainFPM.addPass(Pass: GVNPass ());
1935
1936	// Remove dead memcpy()'s.
1937	MainFPM.addPass(Pass: MemCpyOptPass ());
1938
1939	// Nuke dead stores.
1940	MainFPM.addPass(Pass: DSEPass ());
1941	MainFPM.addPass(Pass: MoveAutoInitPass ());
1942	MainFPM.addPass(Pass: MergedLoadStoreMotionPass ());
1943
1944	LoopPassManager LPM;
1945	if (EnableLoopFlatten && Level.getSpeedupLevel() > `1`)
1946	LPM.addPass(Pass: LoopFlattenPass ());
1947	LPM.addPass(Pass: IndVarSimplifyPass ());
1948	LPM.addPass(Pass: LoopDeletionPass ());
1949	// FIXME: Add loop interchange.
1950
1951	// Unroll small loops and perform peeling.
1952	LPM.addPass(Pass: LoopFullUnrollPass (Level.getSpeedupLevel(),
1953	/ OnlyWhenForced= / !PTO.LoopUnrolling,
1954	PTO.ForgetAllSCEVInLoopUnroll));
1955	// The loop passes in LPM (LoopFullUnrollPass) do not preserve MemorySSA.
1956	// All* loop passes must preserve it, in order to be able to use it.*
1957	MainFPM.addPass(Pass: createFunctionToLoopPassAdaptor(
1958	Pass: std::move(LPM), /UseMemorySSA=/false, /UseBlockFrequencyInfo=/true));
1959
1960	MainFPM.addPass(Pass: LoopDistributePass ());
1961
1962	addVectorPasses(Level, FPM&: MainFPM, / IsFullLTO / true);
1963
1964	// Run the OpenMPOpt CGSCC pass again late.
1965	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(
1966	Pass: OpenMPOptCGSCCPass (ThinOrFullLTOPhase::FullLTOPostLink)));
1967
1968	invokePeepholeEPCallbacks(FPM&: MainFPM, Level);
1969	MainFPM.addPass(Pass: JumpThreadingPass ());
1970	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(MainFPM),
1971	EagerlyInvalidate: PTO.EagerlyInvalidateAnalyses));
1972
1973	// Lower type metadata and the type.test intrinsic. This pass supports
1974	// clang's control flow integrity mechanisms (-fsanitize=cfi) and needs*
1975	// to be run at link time if CFI is enabled. This pass does nothing if
1976	// CFI is disabled.
1977	MPM.addPass(Pass: LowerTypeTestsPass (ExportSummary, nullptr));
1978	// Run a second time to clean up any type tests left behind by WPD for use
1979	// in ICP (which is performed earlier than this in the regular LTO pipeline).
1980	MPM.addPass(Pass: LowerTypeTestsPass (nullptr, nullptr, true));
1981
1982	// Enable splitting late in the FullLTO post-link pipeline.
1983	if (EnableHotColdSplit)
1984	MPM.addPass(Pass: HotColdSplittingPass ());
1985
1986	// Add late LTO optimization passes.
1987	FunctionPassManager LateFPM;
1988
1989	// LoopSink pass sinks instructions hoisted by LICM, which serves as a
1990	// canonicalization pass that enables other optimizations. As a result,
1991	// LoopSink pass needs to be a very late IR pass to avoid undoing LICM
1992	// result too early.
1993	LateFPM.addPass(Pass: LoopSinkPass ());
1994
1995	// This hoists/decomposes div/rem ops. It should run after other sink/hoist
1996	// passes to avoid re-sinking, but before SimplifyCFG because it can allow
1997	// flattening of blocks.
1998	LateFPM.addPass(Pass: DivRemPairsPass ());
1999
2000	// Delete basic blocks, which optimization passes may have killed.
2001	LateFPM.addPass(Pass: SimplifyCFGPass (
2002	SimplifyCFGOptions ().convertSwitchRangeToICmp(B: true).hoistCommonInsts(
2003	B: true)));
2004	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(LateFPM)));
2005
2006	// Drop bodies of available eternally objects to improve GlobalDCE.
2007	MPM.addPass(Pass: EliminateAvailableExternallyPass ());
2008
2009	// Now that we have optimized the program, discard unreachable functions.
2010	MPM.addPass(Pass: GlobalDCEPass (/InLTOPostLink=/true));
2011
2012	if (PTO.MergeFunctions)
2013	MPM.addPass(Pass: MergeFunctionsPass ());
2014
2015	if (PTO.CallGraphProfile)
2016	MPM.addPass(Pass: CGProfilePass (/InLTOPostLink=/true));
2017
2018	invokeFullLinkTimeOptimizationLastEPCallbacks(MPM, Level);
2019
2020	// Emit annotation remarks.
2021	addAnnotationRemarksPass(MPM);
2022
2023	return MPM;
2024	}
2025
2026	ModulePassManager PassBuilder::buildO0DefaultPipeline(OptimizationLevel Level,
2027	bool LTOPreLink) {
2028	assert(Level == OptimizationLevel::O0 &&
2029	"buildO0DefaultPipeline should only be used with O0");
2030
2031	ModulePassManager MPM;
2032
2033	// Perform pseudo probe instrumentation in O0 mode. This is for the
2034	// consistency between different build modes. For example, a LTO build can be
2035	// mixed with an O0 prelink and an O2 postlink. Loading a sample profile in
2036	// the postlink will require pseudo probe instrumentation in the prelink.
2037	if (PGOOpt && PGOOpt ->PseudoProbeForProfiling)
2038	MPM.addPass(Pass: SampleProfileProbePass (TM));
2039
2040	if (PGOOpt && (PGOOpt ->Action == PGOOptions::IRInstr \|\|
2041	PGOOpt ->Action == PGOOptions::IRUse))
2042	addPGOInstrPassesForO0(
2043	MPM,
2044	/RunProfileGen=/(PGOOpt ->Action == PGOOptions::IRInstr),
2045	/IsCS=/false, AtomicCounterUpdate: PGOOpt ->AtomicCounterUpdate, ProfileFile: PGOOpt ->ProfileFile,
2046	ProfileRemappingFile: PGOOpt ->ProfileRemappingFile, FS: PGOOpt ->FS);
2047
2048	invokePipelineStartEPCallbacks(MPM, Level);
2049
2050	if (PGOOpt && PGOOpt ->DebugInfoForProfiling)
2051	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AddDiscriminatorsPass ()));
2052
2053	invokePipelineEarlySimplificationEPCallbacks(MPM, Level);
2054
2055	// Build a minimal pipeline based on the semantics required by LLVM,
2056	// which is just that always inlining occurs. Further, disable generating
2057	// lifetime intrinsics to avoid enabling further optimizations during
2058	// code generation.
2059	MPM.addPass(Pass: AlwaysInlinerPass (
2060	/InsertLifetimeIntrinsics=/false));
2061
2062	if (PTO.MergeFunctions)
2063	MPM.addPass(Pass: MergeFunctionsPass ());
2064
2065	if (EnableMatrix)
2066	MPM.addPass(
2067	Pass: createModuleToFunctionPassAdaptor(Pass: LowerMatrixIntrinsicsPass (true)));
2068
2069	if (!CGSCCOptimizerLateEPCallbacks.empty()) {
2070	CGSCCPassManager CGPM;
2071	invokeCGSCCOptimizerLateEPCallbacks(CGPM, Level);
2072	if (!CGPM.isEmpty())
2073	MPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2074	}
2075	if (!LateLoopOptimizationsEPCallbacks.empty()) {
2076	LoopPassManager LPM;
2077	invokeLateLoopOptimizationsEPCallbacks(LPM, Level);
2078	if (!LPM.isEmpty()) {
2079	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2080	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2081	}
2082	}
2083	if (!LoopOptimizerEndEPCallbacks.empty()) {
2084	LoopPassManager LPM;
2085	invokeLoopOptimizerEndEPCallbacks(LPM, Level);
2086	if (!LPM.isEmpty()) {
2087	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(
2088	Pass: createFunctionToLoopPassAdaptor(Pass: std::move(LPM))));
2089	}
2090	}
2091	if (!ScalarOptimizerLateEPCallbacks.empty()) {
2092	FunctionPassManager FPM;
2093	invokeScalarOptimizerLateEPCallbacks(FPM, Level);
2094	if (!FPM.isEmpty())
2095	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2096	}
2097
2098	invokeOptimizerEarlyEPCallbacks(MPM, Level);
2099
2100	if (!VectorizerStartEPCallbacks.empty()) {
2101	FunctionPassManager FPM;
2102	invokeVectorizerStartEPCallbacks(FPM, Level);
2103	if (!FPM.isEmpty())
2104	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: std::move(FPM)));
2105	}
2106
2107	ModulePassManager CoroPM;
2108	CoroPM.addPass(Pass: CoroEarlyPass ());
2109	CGSCCPassManager CGPM;
2110	CGPM.addPass(Pass: CoroSplitPass ());
2111	CoroPM.addPass(Pass: createModuleToPostOrderCGSCCPassAdaptor(Pass: std::move(CGPM)));
2112	CoroPM.addPass(Pass: CoroCleanupPass ());
2113	CoroPM.addPass(Pass: GlobalDCEPass ());
2114	MPM.addPass(Pass: CoroConditionalWrapper (std::move(CoroPM)));
2115
2116	invokeOptimizerLastEPCallbacks(MPM, Level);
2117
2118	if (LTOPreLink)
2119	addRequiredLTOPreLinkPasses(MPM);
2120
2121	MPM.addPass(Pass: createModuleToFunctionPassAdaptor(Pass: AnnotationRemarksPass ()));
2122
2123	return MPM;
2124	}
2125
2126	AAManager PassBuilder::buildDefaultAAPipeline() {
2127	AAManager AA;
2128
2129	// The order in which these are registered determines their priority when
2130	// being queried.
2131
2132	// First we register the basic alias analysis that provides the majority of
2133	// per-function local AA logic. This is a stateless, on-demand local set of
2134	// AA techniques.
2135	AA.registerFunctionAnalysis<BasicAA>();
2136
2137	// Next we query fast, specialized alias analyses that wrap IR-embedded
2138	// information about aliasing.
2139	AA.registerFunctionAnalysis<ScopedNoAliasAA>();
2140	AA.registerFunctionAnalysis<TypeBasedAA>();
2141
2142	// Add support for querying global aliasing information when available.
2143	// Because the `AAManager` is a function analysis and `GlobalsAA` is a module
2144	// analysis, all that the `AAManager` can do is query for any cached
2145	// results from `GlobalsAA` through a readonly proxy.
2146	if (EnableGlobalAnalyses)
2147	AA.registerModuleAnalysis<GlobalsAA>();
2148
2149	// Add target-specific alias analyses.
2150	if (TM)
2151	TM->registerDefaultAliasAnalyses(AA);
2152
2153	return AA;
2154	}
2155

source code of llvm/lib/Passes/PassBuilderPipelines.cpp