SimplifyCFG.cpp source code [llvm/lib/Transforms/Utils/SimplifyCFG.cpp]

1	//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Peephole optimize the CFG.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "llvm/ADT/APInt.h"
14	#include "llvm/ADT/ArrayRef.h"
15	#include "llvm/ADT/DenseMap.h"
16	#include "llvm/ADT/MapVector.h"
17	#include "llvm/ADT/STLExtras.h"
18	#include "llvm/ADT/Sequence.h"
19	#include "llvm/ADT/SetOperations.h"
20	#include "llvm/ADT/SetVector.h"
21	#include "llvm/ADT/SmallPtrSet.h"
22	#include "llvm/ADT/SmallVector.h"
23	#include "llvm/ADT/Statistic.h"
24	#include "llvm/ADT/StringRef.h"
25	#include "llvm/Analysis/AssumptionCache.h"
26	#include "llvm/Analysis/CaptureTracking.h"
27	#include "llvm/Analysis/ConstantFolding.h"
28	#include "llvm/Analysis/DomTreeUpdater.h"
29	#include "llvm/Analysis/GuardUtils.h"
30	#include "llvm/Analysis/InstructionSimplify.h"
31	#include "llvm/Analysis/MemorySSA.h"
32	#include "llvm/Analysis/MemorySSAUpdater.h"
33	#include "llvm/Analysis/TargetTransformInfo.h"
34	#include "llvm/Analysis/ValueTracking.h"
35	#include "llvm/IR/Attributes.h"
36	#include "llvm/IR/BasicBlock.h"
37	#include "llvm/IR/CFG.h"
38	#include "llvm/IR/Constant.h"
39	#include "llvm/IR/ConstantRange.h"
40	#include "llvm/IR/Constants.h"
41	#include "llvm/IR/DataLayout.h"
42	#include "llvm/IR/DebugInfo.h"
43	#include "llvm/IR/DerivedTypes.h"
44	#include "llvm/IR/Function.h"
45	#include "llvm/IR/GlobalValue.h"
46	#include "llvm/IR/GlobalVariable.h"
47	#include "llvm/IR/IRBuilder.h"
48	#include "llvm/IR/InstrTypes.h"
49	#include "llvm/IR/Instruction.h"
50	#include "llvm/IR/Instructions.h"
51	#include "llvm/IR/IntrinsicInst.h"
52	#include "llvm/IR/LLVMContext.h"
53	#include "llvm/IR/MDBuilder.h"
54	#include "llvm/IR/MemoryModelRelaxationAnnotations.h"
55	#include "llvm/IR/Metadata.h"
56	#include "llvm/IR/Module.h"
57	#include "llvm/IR/NoFolder.h"
58	#include "llvm/IR/Operator.h"
59	#include "llvm/IR/PatternMatch.h"
60	#include "llvm/IR/ProfDataUtils.h"
61	#include "llvm/IR/Type.h"
62	#include "llvm/IR/Use.h"
63	#include "llvm/IR/User.h"
64	#include "llvm/IR/Value.h"
65	#include "llvm/IR/ValueHandle.h"
66	#include "llvm/Support/BranchProbability.h"
67	#include "llvm/Support/Casting.h"
68	#include "llvm/Support/CommandLine.h"
69	#include "llvm/Support/Debug.h"
70	#include "llvm/Support/ErrorHandling.h"
71	#include "llvm/Support/KnownBits.h"
72	#include "llvm/Support/MathExtras.h"
73	#include "llvm/Support/raw_ostream.h"
74	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
75	#include "llvm/Transforms/Utils/Local.h"
76	#include "llvm/Transforms/Utils/ValueMapper.h"
77	#include <algorithm>
78	#include <cassert>
79	#include <climits>
80	#include <cstddef>
81	#include <cstdint>
82	#include <iterator>
83	#include <map>
84	#include <optional>
85	#include <set>
86	#include <tuple>
87	#include <utility>
88	#include <vector>
89
90	using namespace llvm;
91	using namespace PatternMatch;
92
93	#define DEBUG_TYPE "simplifycfg"
94
95	cl::opt<bool> llvm::RequireAndPreserveDomTree(
96	"simplifycfg-require-and-preserve-domtree", cl::Hidden,
97
98	cl::desc ("Temorary development switch used to gradually uplift SimplifyCFG "
99	"into preserving DomTree,"));
100
101	// Chosen as 2 so as to be cheap, but still to have enough power to fold
102	// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103	// To catch this, we need to fold a compare and a select, hence '2' being the
104	// minimum reasonable default.
105	static cl::opt<unsigned> PHINodeFoldingThreshold(
106	"phi-node-folding-threshold", cl::Hidden, cl::init(Val: `2`),
107	cl::desc (
108	"Control the amount of phi node folding to perform (default = 2)"));
109
110	static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
111	"two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(Val: `4`),
112	cl::desc ("Control the maximal total instruction cost that we are willing "
113	"to speculatively execute to fold a 2-entry PHI node into a "
114	"select (default = 4)"));
115
116	static cl::opt<bool>
117	HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(Val: true),
118	cl::desc ("Hoist common instructions up to the parent block"));
119
120	static cl::opt<unsigned>
121	HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
122	cl::init(Val: `20`),
123	cl::desc ("Allow reordering across at most this many "
124	"instructions when hoisting"));
125
126	static cl::opt<bool>
127	SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(Val: true),
128	cl::desc ("Sink common instructions down to the end block"));
129
130	static cl::opt<bool> HoistCondStores(
131	"simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(Val: true),
132	cl::desc ("Hoist conditional stores if an unconditional store precedes"));
133
134	static cl::opt<bool> MergeCondStores(
135	"simplifycfg-merge-cond-stores", cl::Hidden, cl::init(Val: true),
136	cl::desc ("Hoist conditional stores even if an unconditional store does not "
137	"precede - hoist multiple conditional stores into a single "
138	"predicated store"));
139
140	static cl::opt<bool> MergeCondStoresAggressively(
141	"simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(Val: false),
142	cl::desc ("When merging conditional stores, do so even if the resultant "
143	"basic blocks are unlikely to be if-converted as a result"));
144
145	static cl::opt<bool> SpeculateOneExpensiveInst(
146	"speculate-one-expensive-inst", cl::Hidden, cl::init(Val: true),
147	cl::desc ("Allow exactly one expensive instruction to be speculatively "
148	"executed"));
149
150	static cl::opt<unsigned> MaxSpeculationDepth(
151	"max-speculation-depth", cl::Hidden, cl::init(Val: `10`),
152	cl::desc ("Limit maximum recursion depth when calculating costs of "
153	"speculatively executed instructions"));
154
155	static cl::opt<int>
156	MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
157	cl::init(Val: `10`),
158	cl::desc ("Max size of a block which is still considered "
159	"small enough to thread through"));
160
161	// Two is chosen to allow one negation and a logical combine.
162	static cl::opt<unsigned>
163	BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
164	cl::init(Val: `2`),
165	cl::desc ("Maximum cost of combining conditions when "
166	"folding branches"));
167
168	static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
169	"simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
170	cl::init(Val: `2`),
171	cl::desc ("Multiplier to apply to threshold when determining whether or not "
172	"to fold branch to common destination when vector operations are "
173	"present"));
174
175	static cl::opt<bool> EnableMergeCompatibleInvokes(
176	"simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(Val: true),
177	cl::desc ("Allow SimplifyCFG to merge invokes together when appropriate"));
178
179	static cl::opt<unsigned> MaxSwitchCasesPerResult(
180	"max-switch-cases-per-result", cl::Hidden, cl::init(Val: `16`),
181	cl::desc ("Limit cases to analyze when converting a switch to select"));
182
183	STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
184	STATISTIC(NumLinearMaps,
185	"Number of switch instructions turned into linear mapping");
186	STATISTIC(NumLookupTables,
187	"Number of switch instructions turned into lookup tables");
188	STATISTIC(
189	NumLookupTablesHoles,
190	"Number of switch instructions turned into lookup tables (holes checked)");
191	STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
192	STATISTIC(NumFoldValueComparisonIntoPredecessors,
193	"Number of value comparisons folded into predecessor basic blocks");
194	STATISTIC(NumFoldBranchToCommonDest,
195	"Number of branches folded into predecessor basic block");
196	STATISTIC(
197	NumHoistCommonCode,
198	"Number of common instruction 'blocks' hoisted up to the begin block");
199	STATISTIC(NumHoistCommonInstrs,
200	"Number of common instructions hoisted up to the begin block");
201	STATISTIC(NumSinkCommonCode,
202	"Number of common instruction 'blocks' sunk down to the end block");
203	STATISTIC(NumSinkCommonInstrs,
204	"Number of common instructions sunk down to the end block");
205	STATISTIC(NumSpeculations, "Number of speculative executed instructions");
206	STATISTIC(NumInvokes,
207	"Number of invokes with empty resume blocks simplified into calls");
208	STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
209	STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
210
211	namespace {
212
213	// The first field contains the value that the switch produces when a certain
214	// case group is selected, and the second field is a vector containing the
215	// cases composing the case group.
216	using SwitchCaseResultVectorTy =
217	SmallVector<std::pair<Constant , SmallVector<ConstantInt , `4`>>, `2`>;
218
219	// The first field contains the phi node that generates a result of the switch
220	// and the second field contains the value generated for a certain case in the
221	// switch for that PHI.
222	using SwitchCaseResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
223
224	/// ValueEqualityComparisonCase - Represents a case of a switch.
225	struct ValueEqualityComparisonCase {
226	ConstantInt *Value;
227	BasicBlock *Dest;
228
229	ValueEqualityComparisonCase(ConstantInt Value, BasicBlock Dest)
230	: Value(Value), Dest(Dest) {}
231
232	bool operator<(ValueEqualityComparisonCase RHS) const {
233	// Comparing pointers is ok as we only rely on the order for uniquing.
234	return Value < RHS.Value;
235	}
236
237	bool operator==(BasicBlock RHSDest) const* { return Dest == RHSDest; }
238	};
239
240	class SimplifyCFGOpt {
241	const TargetTransformInfo &TTI;
242	DomTreeUpdater *DTU;
243	const DataLayout &DL;
244	ArrayRef<WeakVH> LoopHeaders;
245	const SimplifyCFGOptions &Options;
246	bool Resimplify;
247
248	Value isValueEqualityComparison(Instruction TI);
249	BasicBlock *GetValueEqualityComparisonCases(
250	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
251	bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
252	BasicBlock *Pred,
253	IRBuilder<> &Builder);
254	bool PerformValueComparisonIntoPredecessorFolding(Instruction TI, Value &CV,
255	Instruction *PTI,
256	IRBuilder<> &Builder);
257	bool FoldValueComparisonIntoPredecessors(Instruction *TI,
258	IRBuilder<> &Builder);
259
260	bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
261	bool simplifySingleResume(ResumeInst *RI);
262	bool simplifyCommonResume(ResumeInst *RI);
263	bool simplifyCleanupReturn(CleanupReturnInst *RI);
264	bool simplifyUnreachable(UnreachableInst *UI);
265	bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
266	bool simplifyIndirectBr(IndirectBrInst *IBI);
267	bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
268	bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
269	bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
270
271	bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
272	IRBuilder<> &Builder);
273
274	bool hoistCommonCodeFromSuccessors(BasicBlock BB, bool* EqTermsOnly);
275	bool hoistSuccIdenticalTerminatorToSwitchOrIf(
276	Instruction TI, Instruction I1,
277	SmallVectorImpl<Instruction *> &OtherSuccTIs);
278	bool SpeculativelyExecuteBB(BranchInst BI, BasicBlock ThenBB);
279	bool SimplifyTerminatorOnSelect(Instruction OldTerm, Value Cond,
280	BasicBlock TrueBB, BasicBlock FalseBB,
281	uint32_t TrueWeight, uint32_t FalseWeight);
282	bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
283	const DataLayout &DL);
284	bool SimplifySwitchOnSelect(SwitchInst SI, SelectInst Select);
285	bool SimplifyIndirectBrOnSelect(IndirectBrInst IBI, SelectInst SI);
286	bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
287
288	public:
289	SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
290	const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
291	const SimplifyCFGOptions &Opts)
292	: TTI(TTI), DTU(DTU), DL(DL), LoopHeaders (LoopHeaders), Options(Opts) {
293	assert((!DTU \|\| !DTU->hasPostDomTree()) &&
294	"SimplifyCFG is not yet capable of maintaining validity of a "
295	"PostDomTree, so don't ask for it.");
296	}
297
298	bool simplifyOnce(BasicBlock *BB);
299	bool run(BasicBlock *BB);
300
301	// Helper to set Resimplify and return change indication.
302	bool requestResimplify() {
303	Resimplify = true;
304	return true;
305	}
306	};
307
308	} // end anonymous namespace
309
310	/// Return true if all the PHI nodes in the basic block \p BB
311	/// receive compatible (identical) incoming values when coming from
312	/// all of the predecessor blocks that are specified in \p IncomingBlocks.
313	///
314	/// Note that if the values aren't exactly identical, but \p EquivalenceSet
315	/// is provided, and both* of the values are present in the set,*
316	/// then they are considered equal.
317	static bool IncomingValuesAreCompatible(
318	BasicBlock BB, ArrayRef<BasicBlock > IncomingBlocks,
319	SmallPtrSetImpl<Value > EquivalenceSet = nullptr) {
320	assert(IncomingBlocks.size() == `2` &&
321	"Only for a pair of incoming blocks at the time!");
322
323	// FIXME: it is okay if one of the incoming values is an `undef` value,
324	// iff the other incoming value is guaranteed to be a non-poison value.
325	// FIXME: it is okay if one of the incoming values is a `poison` value.
326	return all_of(Range: BB->phis(), P: [IncomingBlocks, EquivalenceSet](PHINode &PN) {
327	Value *IV0 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`0`]);
328	Value *IV1 = PN.getIncomingValueForBlock(BB: IncomingBlocks [`1`]);
329	if (IV0 == IV1)
330	return true;
331	if (EquivalenceSet && EquivalenceSet->contains(Ptr: IV0) &&
332	EquivalenceSet->contains(Ptr: IV1))
333	return true;
334	return false;
335	});
336	}
337
338	/// Return true if it is safe to merge these two
339	/// terminator instructions together.
340	static bool
341	SafeToMergeTerminators(Instruction SI1, Instruction SI2,
342	SmallSetVector<BasicBlock , `4`> FailBlocks = nullptr) {
343	if (SI1 == SI2)
344	return false; // Can't merge with self!
345
346	// It is not safe to merge these two switch instructions if they have a common
347	// successor, and if that successor has a PHI node, and if that* PHI node has*
348	// conflicting incoming values from the two switch blocks.
349	BasicBlock *SI1BB = SI1->getParent();
350	BasicBlock *SI2BB = SI2->getParent();
351
352	SmallPtrSet<BasicBlock *, `16`> SI1Succs(succ_begin(BB: SI1BB), succ_end(BB: SI1BB));
353	bool Fail = false;
354	for (BasicBlock *Succ : successors(BB: SI2BB)) {
355	if (!SI1Succs.count(Ptr: Succ))
356	continue;
357	if (IncomingValuesAreCompatible(BB: Succ, IncomingBlocks: {SI1BB, SI2BB}))
358	continue;
359	Fail = true;
360	if (FailBlocks)
361	FailBlocks->insert(X: Succ);
362	else
363	break;
364	}
365
366	return !Fail;
367	}
368
369	/// Update PHI nodes in Succ to indicate that there will now be entries in it
370	/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
371	/// will be the same as those coming in from ExistPred, an existing predecessor
372	/// of Succ.
373	static void AddPredecessorToBlock(BasicBlock Succ, BasicBlock NewPred,
374	BasicBlock *ExistPred,
375	MemorySSAUpdater MSSAU = nullptr*) {
376	for (PHINode &PN : Succ->phis())
377	PN.addIncoming(V: PN.getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
378	if (MSSAU)
379	if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(BB: Succ))
380	MPhi->addIncoming(V: MPhi->getIncomingValueForBlock(BB: ExistPred), BB: NewPred);
381	}
382
383	/// Compute an abstract "cost" of speculating the given instruction,
384	/// which is assumed to be safe to speculate. TCC_Free means cheap,
385	/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
386	/// expensive.
387	static InstructionCost computeSpeculationCost(const User *I,
388	const TargetTransformInfo &TTI) {
389	assert((!isa<Instruction>(I) \|\|
390	isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
391	"Instruction is not safe to speculatively execute!");
392	return TTI.getInstructionCost(U: I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
393	}
394
395	/// If we have a merge point of an "if condition" as accepted above,
396	/// return true if the specified value dominates the block. We
397	/// don't handle the true generality of domination here, just a special case
398	/// which works well enough for us.
399	///
400	/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
401	/// see if V (which must be an instruction) and its recursive operands
402	/// that do not dominate BB have a combined cost lower than Budget and
403	/// are non-trapping. If both are true, the instruction is inserted into the
404	/// set and true is returned.
405	///
406	/// The cost for most non-trapping instructions is defined as 1 except for
407	/// Select whose cost is 2.
408	///
409	/// After this function returns, Cost is increased by the cost of
410	/// V plus its non-dominating operands. If that cost is greater than
411	/// Budget, false is returned and Cost is undefined.
412	static bool dominatesMergePoint(Value V, BasicBlock BB,
413	SmallPtrSetImpl<Instruction *> &AggressiveInsts,
414	InstructionCost &Cost,
415	InstructionCost Budget,
416	const TargetTransformInfo &TTI,
417	unsigned Depth = `0`) {
418	// It is possible to hit a zero-cost cycle (phi/gep instructions for example),
419	// so limit the recursion depth.
420	// TODO: While this recursion limit does prevent pathological behavior, it
421	// would be better to track visited instructions to avoid cycles.
422	if (Depth == MaxSpeculationDepth)
423	return false;
424
425	Instruction *I = dyn_cast<Instruction>(Val: V);
426	if (!I) {
427	// Non-instructions dominate all instructions and can be executed
428	// unconditionally.
429	return true;
430	}
431	BasicBlock *PBB = I->getParent();
432
433	// We don't want to allow weird loops that might have the "if condition" in
434	// the bottom of this block.
435	if (PBB == BB)
436	return false;
437
438	// If this instruction is defined in a block that contains an unconditional
439	// branch to BB, then it must be in the 'conditional' part of the "if
440	// statement". If not, it definitely dominates the region.
441	BranchInst *BI = dyn_cast<BranchInst>(Val: PBB->getTerminator());
442	if (!BI \|\| BI->isConditional() \|\| BI->getSuccessor(i: `0`) != BB)
443	return true;
444
445	// If we have seen this instruction before, don't count it again.
446	if (AggressiveInsts.count(Ptr: I))
447	return true;
448
449	// Okay, it looks like the instruction IS in the "condition". Check to
450	// see if it's a cheap instruction to unconditionally compute, and if it
451	// only uses stuff defined outside of the condition. If so, hoist it out.
452	if (!isSafeToSpeculativelyExecute(I))
453	return false;
454
455	Cost += computeSpeculationCost(I, TTI);
456
457	// Allow exactly one instruction to be speculated regardless of its cost
458	// (as long as it is safe to do so).
459	// This is intended to flatten the CFG even if the instruction is a division
460	// or other expensive operation. The speculation of an expensive instruction
461	// is expected to be undone in CodeGenPrepare if the speculation has not
462	// enabled further IR optimizations.
463	if (Cost > Budget &&
464	(!SpeculateOneExpensiveInst \|\| !AggressiveInsts.empty() \|\| Depth > `0` \|\|
465	!Cost.isValid()))
466	return false;
467
468	// Okay, we can only really hoist these out if their operands do
469	// not take us over the cost threshold.
470	for (Use &Op : I->operands())
471	if (!dominatesMergePoint(V: Op, BB, AggressiveInsts, Cost, Budget, TTI,
472	Depth: Depth + `1`))
473	return false;
474	// Okay, it's safe to do this! Remember this instruction.
475	AggressiveInsts.insert(Ptr: I);
476	return true;
477	}
478
479	/// Extract ConstantInt from value, looking through IntToPtr
480	/// and PointerNullValue. Return NULL if value is not a constant int.
481	static ConstantInt GetConstantInt(Value V, const DataLayout &DL) {
482	// Normal constant int.
483	ConstantInt *CI = dyn_cast<ConstantInt>(Val: V);
484	if (CI \|\| !isa<Constant>(Val: V) \|\| !V->getType()->isPointerTy() \|\|
485	DL.isNonIntegralPointerType(Ty: V->getType()))
486	return CI;
487
488	// This is some kind of pointer constant. Turn it into a pointer-sized
489	// ConstantInt if possible.
490	IntegerType *PtrTy = cast<IntegerType>(Val: DL.getIntPtrType(V->getType()));
491
492	// Null pointer means 0, see SelectionDAGBuilder::getValue(const Value).*
493	if (isa<ConstantPointerNull>(Val: V))
494	return ConstantInt::get(Ty: PtrTy, V: `0`);
495
496	// IntToPtr const int.
497	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: V))
498	if (CE->getOpcode() == Instruction::IntToPtr)
499	if (ConstantInt *CI = dyn_cast<ConstantInt>(Val: CE->getOperand(i_nocapture: `0`))) {
500	// The constant is very likely to have the right type already.
501	if (CI->getType() == PtrTy)
502	return CI;
503	else
504	return cast<ConstantInt>(
505	Val: ConstantFoldIntegerCast(C: CI, DestTy: PtrTy, /isSigned=/IsSigned: false, DL));
506	}
507	return nullptr;
508	}
509
510	namespace {
511
512	/// Given a chain of or (\|\|) or and (&&) comparison of a value against a
513	/// constant, this will try to recover the information required for a switch
514	/// structure.
515	/// It will depth-first traverse the chain of comparison, seeking for patterns
516	/// like %a == 12 or %a < 4 and combine them to produce a set of integer
517	/// representing the different cases for the switch.
518	/// Note that if the chain is composed of '\|\|' it will build the set of elements
519	/// that matches the comparisons (i.e. any of this value validate the chain)
520	/// while for a chain of '&&' it will build the set elements that make the test
521	/// fail.
522	struct ConstantComparesGatherer {
523	const DataLayout &DL;
524
525	/// Value found for the switch comparison
526	Value CompValue = nullptr*;
527
528	/// Extra clause to be checked before the switch
529	Value Extra = nullptr*;
530
531	/// Set of integers to match in switch
532	SmallVector<ConstantInt *, `8`> Vals;
533
534	/// Number of comparisons matched in the and/or chain
535	unsigned UsedICmps = `0`;
536
537	/// Construct and compute the result for the comparison instruction Cond
538	ConstantComparesGatherer(Instruction Cond, const* DataLayout &DL) : DL(DL) {
539	gather(V: Cond);
540	}
541
542	ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
543	ConstantComparesGatherer &
544	operator=(const ConstantComparesGatherer &) = delete;
545
546	private:
547	/// Try to set the current value used for the comparison, it succeeds only if
548	/// it wasn't set before or if the new value is the same as the old one
549	bool setValueOnce(Value *NewVal) {
550	if (CompValue && CompValue != NewVal)
551	return false;
552	CompValue = NewVal;
553	return (CompValue != nullptr);
554	}
555
556	/// Try to match Instruction "I" as a comparison against a constant and
557	/// populates the array Vals with the set of values that match (or do not
558	/// match depending on isEQ).
559	/// Return false on failure. On success, the Value the comparison matched
560	/// against is placed in CompValue.
561	/// If CompValue is already set, the function is expected to fail if a match
562	/// is found but the value compared to is different.
563	bool matchInstruction(Instruction I, bool* isEQ) {
564	// If this is an icmp against a constant, handle this as one of the cases.
565	ICmpInst *ICI;
566	ConstantInt *C;
567	if (!((ICI = dyn_cast<ICmpInst>(Val: I)) &&
568	(C = GetConstantInt(V: I->getOperand(i: `1`), DL)))) {
569	return false;
570	}
571
572	Value *RHSVal;
573	const APInt *RHSC;
574
575	// Pattern match a special case
576	// (x & ~2^z) == y --> x == y \|\| x == y\|2^z
577	// This undoes a transformation done by instcombine to fuse 2 compares.
578	if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
579	// It's a little bit hard to see why the following transformations are
580	// correct. Here is a CVC3 program to verify them for 64-bit values:
581
582	/*
583	ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
584	x : BITVECTOR(64);
585	y : BITVECTOR(64);
586	z : BITVECTOR(64);
587	mask : BITVECTOR(64) = BVSHL(ONE, z);
588	QUERY( (y & ~mask = y) =>
589	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
590	);
591	QUERY( (y \| mask = y) =>
592	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
593	);
594	*/
595
596	// Please note that each pattern must be a dual implication (<--> or
597	// iff). One directional implication can create spurious matches. If the
598	// implication is only one-way, an unsatisfiable condition on the left
599	// side can imply a satisfiable condition on the right side. Dual
600	// implication ensures that satisfiable conditions are transformed to
601	// other satisfiable conditions and unsatisfiable conditions are
602	// transformed to other unsatisfiable conditions.
603
604	// Here is a concrete example of a unsatisfiable condition on the left
605	// implying a satisfiable condition on the right:
606	//
607	// mask = (1 << z)
608	// (x & ~mask) == y --> (x == y \|\| x == (y \| mask))
609	//
610	// Substituting y = 3, z = 0 yields:
611	// (x & -2) == 3 --> (x == 3 \|\| x == 2)
612
613	// Pattern match a special case:
614	/*
615	QUERY( (y & ~mask = y) =>
616	((x & ~mask = y) <=> (x = y OR x = (y \| mask)))
617	);
618	*/
619	if (match(V: ICI->getOperand(i_nocapture: `0`),
620	P: m_And(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
621	APInt Mask = ~*RHSC;
622	if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
623	// If we already have a value for the switch, it has to match!
624	if (!setValueOnce(RHSVal))
625	return false;
626
627	Vals.push_back(Elt: C);
628	Vals.push_back(
629	Elt: ConstantInt::get(Context&: C->getContext(),
630	V: C->getValue() \| Mask));
631	UsedICmps++;
632	return true;
633	}
634	}
635
636	// Pattern match a special case:
637	/*
638	QUERY( (y \| mask = y) =>
639	((x \| mask = y) <=> (x = y OR x = (y & ~mask)))
640	);
641	*/
642	if (match(V: ICI->getOperand(i_nocapture: `0`),
643	P: m_Or(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
644	APInt Mask = *RHSC;
645	if (Mask.isPowerOf2() && (C->getValue() \| Mask) == C->getValue()) {
646	// If we already have a value for the switch, it has to match!
647	if (!setValueOnce(RHSVal))
648	return false;
649
650	Vals.push_back(Elt: C);
651	Vals.push_back(Elt: ConstantInt::get(Context&: C->getContext(),
652	V: C->getValue() & ~Mask));
653	UsedICmps++;
654	return true;
655	}
656	}
657
658	// If we already have a value for the switch, it has to match!
659	if (!setValueOnce(ICI->getOperand(i_nocapture: `0`)))
660	return false;
661
662	UsedICmps++;
663	Vals.push_back(Elt: C);
664	return ICI->getOperand(i_nocapture: `0`);
665	}
666
667	// If we have "x ult 3", for example, then we can add 0,1,2 to the set.
668	ConstantRange Span =
669	ConstantRange::makeExactICmpRegion(Pred: ICI->getPredicate(), Other: C->getValue());
670
671	// Shift the range if the compare is fed by an add. This is the range
672	// compare idiom as emitted by instcombine.
673	Value *CandidateVal = I->getOperand(i: `0`);
674	if (match(V: I->getOperand(i: `0`), P: m_Add(L: m_Value(V&: RHSVal), R: m_APInt(Res&: RHSC)))) {
675	Span = Span.subtract(CI: *RHSC);
676	CandidateVal = RHSVal;
677	}
678
679	// If this is an and/!= check, then we are looking to build the set of
680	// value that don't* pass the and chain. I.e. to turn "x ugt 2" into*
681	// x != 0 && x != 1.
682	if (!isEQ)
683	Span = Span.inverse();
684
685	// If there are a ton of values, we don't want to make a ginormous switch.
686	if (Span.isSizeLargerThan(MaxSize: `8`) \|\| Span.isEmptySet()) {
687	return false;
688	}
689
690	// If we already have a value for the switch, it has to match!
691	if (!setValueOnce(CandidateVal))
692	return false;
693
694	// Add all values from the range to the set
695	for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
696	Vals.push_back(Elt: ConstantInt::get(Context&: I->getContext(), V: Tmp));
697
698	UsedICmps++;
699	return true;
700	}
701
702	/// Given a potentially 'or'd or 'and'd together collection of icmp
703	/// eq/ne/lt/gt instructions that compare a value against a constant, extract
704	/// the value being compared, and stick the list constants into the Vals
705	/// vector.
706	/// One "Extra" case is allowed to differ from the other.
707	void gather(Value *V) {
708	bool isEQ = match(V, P: m_LogicalOr(L: m_Value(), R: m_Value()));
709
710	// Keep a stack (SmallVector for efficiency) for depth-first traversal
711	SmallVector<Value *, `8`> DFT;
712	SmallPtrSet<Value *, `8`> Visited;
713
714	// Initialize
715	Visited.insert(Ptr: V);
716	DFT.push_back(Elt: V);
717
718	while (!DFT.empty()) {
719	V = DFT.pop_back_val();
720
721	if (Instruction *I = dyn_cast<Instruction>(Val: V)) {
722	// If it is a \|\| (or && depending on isEQ), process the operands.
723	Value Op0, Op1;
724	if (isEQ ? match(V: I, P: m_LogicalOr(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))
725	: match(V: I, P: m_LogicalAnd(L: m_Value(V&: Op0), R: m_Value(V&: Op1)))) {
726	if (Visited.insert(Ptr: Op1).second)
727	DFT.push_back(Elt: Op1);
728	if (Visited.insert(Ptr: Op0).second)
729	DFT.push_back(Elt: Op0);
730
731	continue;
732	}
733
734	// Try to match the current instruction
735	if (matchInstruction(I, isEQ))
736	// Match succeed, continue the loop
737	continue;
738	}
739
740	// One element of the sequence of \|\| (or &&) could not be match as a
741	// comparison against the same value as the others.
742	// We allow only one "Extra" case to be checked before the switch
743	if (!Extra) {
744	Extra = V;
745	continue;
746	}
747	// Failed to parse a proper sequence, abort now
748	CompValue = nullptr;
749	break;
750	}
751	}
752	};
753
754	} // end anonymous namespace
755
756	static void EraseTerminatorAndDCECond(Instruction *TI,
757	MemorySSAUpdater MSSAU = nullptr*) {
758	Instruction Cond = nullptr*;
759	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
760	Cond = dyn_cast<Instruction>(Val: SI->getCondition());
761	} else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
762	if (BI->isConditional())
763	Cond = dyn_cast<Instruction>(Val: BI->getCondition());
764	} else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(Val: TI)) {
765	Cond = dyn_cast<Instruction>(Val: IBI->getAddress());
766	}
767
768	TI->eraseFromParent();
769	if (Cond)
770	RecursivelyDeleteTriviallyDeadInstructions(V: Cond, TLI: nullptr, MSSAU);
771	}
772
773	/// Return true if the specified terminator checks
774	/// to see if a value is equal to constant integer value.
775	Value SimplifyCFGOpt::isValueEqualityComparison(Instruction TI) {
776	Value CV = nullptr*;
777	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
778	// Do not permit merging of large switch instructions into their
779	// predecessors unless there is only one predecessor.
780	if (!SI->getParent()->hasNPredecessorsOrMore(N: `128` / SI->getNumSuccessors()))
781	CV = SI->getCondition();
782	} else if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI))
783	if (BI->isConditional() && BI->getCondition()->hasOneUse())
784	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val: BI->getCondition())) {
785	if (ICI->isEquality() && GetConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL))
786	CV = ICI->getOperand(i_nocapture: `0`);
787	}
788
789	// Unwrap any lossless ptrtoint cast.
790	if (CV) {
791	if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(Val: CV)) {
792	Value *Ptr = PTII->getPointerOperand();
793	if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
794	CV = Ptr;
795	}
796	}
797	return CV;
798	}
799
800	/// Given a value comparison instruction,
801	/// decode all of the 'cases' that it represents and return the 'default' block.
802	BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
803	Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
804	if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: TI)) {
805	Cases.reserve(n: SI->getNumCases());
806	for (auto Case : SI->cases())
807	Cases.push_back(x: ValueEqualityComparisonCase (Case.getCaseValue(),
808	Case.getCaseSuccessor()));
809	return SI->getDefaultDest();
810	}
811
812	BranchInst *BI = cast<BranchInst>(Val: TI);
813	ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
814	BasicBlock *Succ = BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_NE);
815	Cases.push_back(x: ValueEqualityComparisonCase (
816	GetConstantInt(V: ICI->getOperand(i_nocapture: `1`), DL), Succ));
817	return BI->getSuccessor(i: ICI->getPredicate() == ICmpInst::ICMP_EQ);
818	}
819
820	/// Given a vector of bb/value pairs, remove any entries
821	/// in the list that match the specified block.
822	static void
823	EliminateBlockCases(BasicBlock *BB,
824	std::vector<ValueEqualityComparisonCase> &Cases) {
825	llvm::erase(C&: Cases, V: BB);
826	}
827
828	/// Return true if there are any keys in C1 that exist in C2 as well.
829	static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
830	std::vector<ValueEqualityComparisonCase> &C2) {
831	std::vector<ValueEqualityComparisonCase> V1 = &C1, V2 = &C2;
832
833	// Make V1 be smaller than V2.
834	if (V1->size() > V2->size())
835	std::swap(a&: V1, b&: V2);
836
837	if (V1->empty())
838	return false;
839	if (V1->size() == `1`) {
840	// Just scan V2.
841	ConstantInt TheVal = (V1)[`0`].Value;
842	for (const ValueEqualityComparisonCase &VECC : *V2)
843	if (TheVal == VECC.Value)
844	return true;
845	}
846
847	// Otherwise, just sort both lists and compare element by element.
848	array_pod_sort(Start: V1->begin(), End: V1->end());
849	array_pod_sort(Start: V2->begin(), End: V2->end());
850	unsigned i1 = `0`, i2 = `0`, e1 = V1->size(), e2 = V2->size();
851	while (i1 != e1 && i2 != e2) {
852	if ((V1)[i1].Value == (V2)[i2].Value)
853	return true;
854	if ((V1)[i1].Value < (V2)[i2].Value)
855	++i1;
856	else
857	++i2;
858	}
859	return false;
860	}
861
862	// Set branch weights on SwitchInst. This sets the metadata if there is at
863	// least one non-zero weight.
864	static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
865	// Check that there is at least one non-zero weight. Otherwise, pass
866	// nullptr to setMetadata which will erase the existing metadata.
867	MDNode N = nullptr*;
868	if (llvm::any_of(Range&: Weights, P: [](uint32_t W) { return W != `0`; }))
869	N = MDBuilder (SI->getParent()->getContext()).createBranchWeights(Weights);
870	SI->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
871	}
872
873	// Similar to the above, but for branch and select instructions that take
874	// exactly 2 weights.
875	static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
876	uint32_t FalseWeight) {
877	assert(isa<BranchInst>(I) \|\| isa<SelectInst>(I));
878	// Check that there is at least one non-zero weight. Otherwise, pass
879	// nullptr to setMetadata which will erase the existing metadata.
880	MDNode N = nullptr*;
881	if (TrueWeight \|\| FalseWeight)
882	N = MDBuilder (I->getParent()->getContext())
883	.createBranchWeights(TrueWeight, FalseWeight);
884	I->setMetadata(KindID: LLVMContext::MD_prof, Node: N);
885	}
886
887	/// If TI is known to be a terminator instruction and its block is known to
888	/// only have a single predecessor block, check to see if that predecessor is
889	/// also a value comparison with the same value, and if that comparison
890	/// determines the outcome of this comparison. If so, simplify TI. This does a
891	/// very limited form of jump threading.
892	bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
893	Instruction TI, BasicBlock Pred, IRBuilder<> &Builder) {
894	Value *PredVal = isValueEqualityComparison(TI: Pred->getTerminator());
895	if (!PredVal)
896	return false; // Not a value comparison in predecessor.
897
898	Value *ThisVal = isValueEqualityComparison(TI);
899	assert(ThisVal && "This isn't a value comparison!!");
900	if (ThisVal != PredVal)
901	return false; // Different predicates.
902
903	// TODO: Preserve branch weight metadata, similarly to how
904	// FoldValueComparisonIntoPredecessors preserves it.
905
906	// Find out information about when control will move from Pred to TI's block.
907	std::vector<ValueEqualityComparisonCase> PredCases;
908	BasicBlock *PredDef =
909	GetValueEqualityComparisonCases(TI: Pred->getTerminator(), Cases&: PredCases);
910	EliminateBlockCases(BB: PredDef, Cases&: PredCases); // Remove default from cases.
911
912	// Find information about how control leaves this block.
913	std::vector<ValueEqualityComparisonCase> ThisCases;
914	BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, Cases&: ThisCases);
915	EliminateBlockCases(BB: ThisDef, Cases&: ThisCases); // Remove default from cases.
916
917	// If TI's block is the default block from Pred's comparison, potentially
918	// simplify TI based on this knowledge.
919	if (PredDef == TI->getParent()) {
920	// If we are here, we know that the value is none of those cases listed in
921	// PredCases. If there are any cases in ThisCases that are in PredCases, we
922	// can simplify TI.
923	if (!ValuesOverlap(C1&: PredCases, C2&: ThisCases))
924	return false;
925
926	if (isa<BranchInst>(Val: TI)) {
927	// Okay, one of the successors of this condbr is dead. Convert it to a
928	// uncond br.
929	assert(ThisCases.size() == `1` && "Branch can only have one case!");
930	// Insert the new branch.
931	Instruction *NI = Builder.CreateBr(Dest: ThisDef);
932	(void)NI;
933
934	// Remove PHI node entries for the dead edge.
935	ThisCases [`0`].Dest->removePredecessor(Pred: PredDef);
936
937	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
938	<< "Through successor TI: " << TI << "Leaving: " << NI
939	<< "\n");
940
941	EraseTerminatorAndDCECond(TI);
942
943	if (DTU)
944	DTU->applyUpdates(
945	Updates: {{DominatorTree::Delete, PredDef, ThisCases [`0`].Dest}});
946
947	return true;
948	}
949
950	SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(Val: TI);
951	// Okay, TI has cases that are statically dead, prune them away.
952	SmallPtrSet<Constant *, `16`> DeadCases;
953	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
954	DeadCases.insert(Ptr: PredCases [i].Value);
955
956	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
957	<< "Through successor TI: " << *TI);
958
959	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
960	for (SwitchInst::CaseIt i = SI ->case_end(), e = SI ->case_begin(); i != e;) {
961	--i;
962	auto *Successor = i ->getCaseSuccessor();
963	if (DTU)
964	++NumPerSuccessorCases [Successor];
965	if (DeadCases.count(Ptr: i ->getCaseValue())) {
966	Successor->removePredecessor(Pred: PredDef);
967	SI.removeCase(I: i);
968	if (DTU)
969	--NumPerSuccessorCases [Successor];
970	}
971	}
972
973	if (DTU) {
974	std::vector<DominatorTree::UpdateType> Updates;
975	for (const std::pair<BasicBlock , int*> &I : NumPerSuccessorCases)
976	if (I.second == `0`)
977	Updates.push_back(x: {DominatorTree::Delete, PredDef, I.first});
978	DTU->applyUpdates(Updates);
979	}
980
981	LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
982	return true;
983	}
984
985	// Otherwise, TI's block must correspond to some matched value. Find out
986	// which value (or set of values) this is.
987	ConstantInt TIV = nullptr*;
988	BasicBlock *TIBB = TI->getParent();
989	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
990	if (PredCases [i].Dest == TIBB) {
991	if (TIV)
992	return false; // Cannot handle multiple values coming to this block.
993	TIV = PredCases [i].Value;
994	}
995	assert(TIV && "No edge from pred to succ?");
996
997	// Okay, we found the one constant that our value can be if we get into TI's
998	// BB. Find out which successor will unconditionally be branched to.
999	BasicBlock TheRealDest = nullptr*;
1000	for (unsigned i = `0`, e = ThisCases.size(); i != e; ++i)
1001	if (ThisCases [i].Value == TIV) {
1002	TheRealDest = ThisCases [i].Dest;
1003	break;
1004	}
1005
1006	// If not handled by any explicit cases, it is handled by the default case.
1007	if (!TheRealDest)
1008	TheRealDest = ThisDef;
1009
1010	SmallPtrSet<BasicBlock *, `2`> RemovedSuccs;
1011
1012	// Remove PHI node entries for dead edges.
1013	BasicBlock *CheckEdge = TheRealDest;
1014	for (BasicBlock *Succ : successors(BB: TIBB))
1015	if (Succ != CheckEdge) {
1016	if (Succ != TheRealDest)
1017	RemovedSuccs.insert(Ptr: Succ);
1018	Succ->removePredecessor(Pred: TIBB);
1019	} else
1020	CheckEdge = nullptr;
1021
1022	// Insert the new branch.
1023	Instruction *NI = Builder.CreateBr(Dest: TheRealDest);
1024	(void)NI;
1025
1026	LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1027	<< "Through successor TI: " << TI << "Leaving: " << NI
1028	<< "\n");
1029
1030	EraseTerminatorAndDCECond(TI);
1031	if (DTU) {
1032	SmallVector<DominatorTree::UpdateType, `2`> Updates;
1033	Updates.reserve(N: RemovedSuccs.size());
1034	for (auto *RemovedSucc : RemovedSuccs)
1035	Updates.push_back(Elt: {DominatorTree::Delete, TIBB, RemovedSucc});
1036	DTU->applyUpdates(Updates);
1037	}
1038	return true;
1039	}
1040
1041	namespace {
1042
1043	/// This class implements a stable ordering of constant
1044	/// integers that does not depend on their address. This is important for
1045	/// applications that sort ConstantInt's to ensure uniqueness.
1046	struct ConstantIntOrdering {
1047	bool operator()(const ConstantInt LHS, const* ConstantInt RHS) const* {
1048	return LHS->getValue().ult(RHS: RHS->getValue());
1049	}
1050	};
1051
1052	} // end anonymous namespace
1053
1054	static int ConstantIntSortPredicate(ConstantInt *const *P1,
1055	ConstantInt *const *P2) {
1056	const ConstantInt LHS = P1;
1057	const ConstantInt RHS = P2;
1058	if (LHS == RHS)
1059	return `0`;
1060	return LHS->getValue().ult(RHS: RHS->getValue()) ? `1` : -`1`;
1061	}
1062
1063	/// Get Weights of a given terminator, the default weight is at the front
1064	/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1065	/// metadata.
1066	static void GetBranchWeights(Instruction *TI,
1067	SmallVectorImpl<uint64_t> &Weights) {
1068	MDNode *MD = TI->getMetadata(KindID: LLVMContext::MD_prof);
1069	assert(MD);
1070	for (unsigned i = `1`, e = MD->getNumOperands(); i < e; ++i) {
1071	ConstantInt *CI = mdconst::extract<ConstantInt>(MD: MD->getOperand(I: i));
1072	Weights.push_back(Elt: CI->getValue().getZExtValue());
1073	}
1074
1075	// If TI is a conditional eq, the default case is the false case,
1076	// and the corresponding branch-weight data is at index 2. We swap the
1077	// default weight to be the first entry.
1078	if (BranchInst *BI = dyn_cast<BranchInst>(Val: TI)) {
1079	assert(Weights.size() == `2`);
1080	ICmpInst *ICI = cast<ICmpInst>(Val: BI->getCondition());
1081	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1082	std::swap(a&: Weights.front(), b&: Weights.back());
1083	}
1084	}
1085
1086	/// Keep halving the weights until all can fit in uint32_t.
1087	static void FitWeights(MutableArrayRef<uint64_t> Weights) {
1088	uint64_t Max = *llvm::max_element(Range&: Weights);
1089	if (Max > UINT_MAX) {
1090	unsigned Offset = `32` - llvm::countl_zero(Val: Max);
1091	for (uint64_t &I : Weights)
1092	I >>= Offset;
1093	}
1094	}
1095
1096	static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1097	BasicBlock BB, BasicBlock PredBlock, ValueToValueMapTy &VMap) {
1098	Instruction *PTI = PredBlock->getTerminator();
1099
1100	// If we have bonus instructions, clone them into the predecessor block.
1101	// Note that there may be multiple predecessor blocks, so we cannot move
1102	// bonus instructions to a predecessor block.
1103	for (Instruction &BonusInst : *BB) {
1104	if (BonusInst.isTerminator())
1105	continue;
1106
1107	Instruction *NewBonusInst = BonusInst.clone();
1108
1109	if (!isa<DbgInfoIntrinsic>(Val: BonusInst) &&
1110	PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1111	// Unless the instruction has the same !dbg location as the original
1112	// branch, drop it. When we fold the bonus instructions we want to make
1113	// sure we reset their debug locations in order to avoid stepping on
1114	// dead code caused by folding dead branches.
1115	NewBonusInst->setDebugLoc(DebugLoc ());
1116	}
1117
1118	RemapInstruction(I: NewBonusInst, VM&: VMap,
1119	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
1120
1121	// If we speculated an instruction, we need to drop any metadata that may
1122	// result in undefined behavior, as the metadata might have been valid
1123	// only given the branch precondition.
1124	// Similarly strip attributes on call parameters that may cause UB in
1125	// location the call is moved to.
1126	NewBonusInst->dropUBImplyingAttrsAndMetadata();
1127
1128	NewBonusInst->insertInto(ParentBB: PredBlock, It: PTI->getIterator());
1129	auto Range = NewBonusInst->cloneDebugInfoFrom(From: &BonusInst);
1130	RemapDbgVariableRecordRange(M: NewBonusInst->getModule(), Range, VM&: VMap,
1131	Flags: RF_NoModuleLevelChanges \|
1132	RF_IgnoreMissingLocals);
1133
1134	if (isa<DbgInfoIntrinsic>(Val: BonusInst))
1135	continue;
1136
1137	NewBonusInst->takeName(V: &BonusInst);
1138	BonusInst.setName(NewBonusInst->getName() + ".old");
1139	VMap [&BonusInst] = NewBonusInst;
1140
1141	// Update (liveout) uses of bonus instructions,
1142	// now that the bonus instruction has been cloned into predecessor.
1143	// Note that we expect to be in a block-closed SSA form for this to work!
1144	for (Use &U : make_early_inc_range(Range: BonusInst.uses())) {
1145	auto *UI = cast<Instruction>(Val: U.getUser());
1146	auto *PN = dyn_cast<PHINode>(Val: UI);
1147	if (!PN) {
1148	assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1149	"If the user is not a PHI node, then it should be in the same "
1150	"block as, and come after, the original bonus instruction.");
1151	continue; // Keep using the original bonus instruction.
1152	}
1153	// Is this the block-closed SSA form PHI node?
1154	if (PN->getIncomingBlock(U) == BB)
1155	continue; // Great, keep using the original bonus instruction.
1156	// The only other alternative is an "use" when coming from
1157	// the predecessor block - here we should refer to the cloned bonus instr.
1158	assert(PN->getIncomingBlock(U) == PredBlock &&
1159	"Not in block-closed SSA form?");
1160	U.set(NewBonusInst);
1161	}
1162	}
1163	}
1164
1165	bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1166	Instruction TI, Value &CV, Instruction *PTI, IRBuilder<> &Builder) {
1167	BasicBlock *BB = TI->getParent();
1168	BasicBlock *Pred = PTI->getParent();
1169
1170	SmallVector<DominatorTree::UpdateType, `32`> Updates;
1171
1172	// Figure out which 'cases' to copy from SI to PSI.
1173	std::vector<ValueEqualityComparisonCase> BBCases;
1174	BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, Cases&: BBCases);
1175
1176	std::vector<ValueEqualityComparisonCase> PredCases;
1177	BasicBlock *PredDefault = GetValueEqualityComparisonCases(TI: PTI, Cases&: PredCases);
1178
1179	// Based on whether the default edge from PTI goes to BB or not, fill in
1180	// PredCases and PredDefault with the new switch cases we would like to
1181	// build.
1182	SmallMapVector<BasicBlock , int*, `8`> NewSuccessors;
1183
1184	// Update the branch weight metadata along the way
1185	SmallVector<uint64_t, `8`> Weights;
1186	bool PredHasWeights = hasBranchWeightMD(I: *PTI);
1187	bool SuccHasWeights = hasBranchWeightMD(I: *TI);
1188
1189	if (PredHasWeights) {
1190	GetBranchWeights(TI: PTI, Weights);
1191	// branch-weight metadata is inconsistent here.
1192	if (Weights.size() != `1` + PredCases.size())
1193	PredHasWeights = SuccHasWeights = false;
1194	} else if (SuccHasWeights)
1195	// If there are no predecessor weights but there are successor weights,
1196	// populate Weights with 1, which will later be scaled to the sum of
1197	// successor's weights
1198	Weights.assign(NumElts: `1` + PredCases.size(), Elt: `1`);
1199
1200	SmallVector<uint64_t, `8`> SuccWeights;
1201	if (SuccHasWeights) {
1202	GetBranchWeights(TI, Weights&: SuccWeights);
1203	// branch-weight metadata is inconsistent here.
1204	if (SuccWeights.size() != `1` + BBCases.size())
1205	PredHasWeights = SuccHasWeights = false;
1206	} else if (PredHasWeights)
1207	SuccWeights.assign(NumElts: `1` + BBCases.size(), Elt: `1`);
1208
1209	if (PredDefault == BB) {
1210	// If this is the default destination from PTI, only the edges in TI
1211	// that don't occur in PTI, or that branch to BB will be activated.
1212	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1213	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1214	if (PredCases [i].Dest != BB)
1215	PTIHandled.insert(x: PredCases [i].Value);
1216	else {
1217	// The default destination is BB, we don't need explicit targets.
1218	std::swap(a&: PredCases [i], b&: PredCases.back());
1219
1220	if (PredHasWeights \|\| SuccHasWeights) {
1221	// Increase weight for the default case.
1222	Weights [`0`] += Weights [i + `1`];
1223	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1224	Weights.pop_back();
1225	}
1226
1227	PredCases.pop_back();
1228	--i;
1229	--e;
1230	}
1231
1232	// Reconstruct the new switch statement we will be building.
1233	if (PredDefault != BBDefault) {
1234	PredDefault->removePredecessor(Pred);
1235	if (DTU && PredDefault != BB)
1236	Updates.push_back(Elt: {DominatorTree::Delete, Pred, PredDefault});
1237	PredDefault = BBDefault;
1238	++NewSuccessors [BBDefault];
1239	}
1240
1241	unsigned CasesFromPred = Weights.size();
1242	uint64_t ValidTotalSuccWeight = `0`;
1243	for (unsigned i = `0`, e = BBCases.size(); i != e; ++i)
1244	if (!PTIHandled.count(x: BBCases [i].Value) && BBCases [i].Dest != BBDefault) {
1245	PredCases.push_back(x: BBCases [i]);
1246	++NewSuccessors [BBCases [i].Dest];
1247	if (SuccHasWeights \|\| PredHasWeights) {
1248	// The default weight is at index 0, so weight for the ith case
1249	// should be at index i+1. Scale the cases from successor by
1250	// PredDefaultWeight (Weights[0]).
1251	Weights.push_back(Elt: Weights [`0`] * SuccWeights [i + `1`]);
1252	ValidTotalSuccWeight += SuccWeights [i + `1`];
1253	}
1254	}
1255
1256	if (SuccHasWeights \|\| PredHasWeights) {
1257	ValidTotalSuccWeight += SuccWeights [`0`];
1258	// Scale the cases from predecessor by ValidTotalSuccWeight.
1259	for (unsigned i = `1`; i < CasesFromPred; ++i)
1260	Weights [i] *= ValidTotalSuccWeight;
1261	// Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1262	Weights [`0`] *= SuccWeights [`0`];
1263	}
1264	} else {
1265	// If this is not the default destination from PSI, only the edges
1266	// in SI that occur in PSI with a destination of BB will be
1267	// activated.
1268	std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1269	std::map<ConstantInt *, uint64_t> WeightsForHandled;
1270	for (unsigned i = `0`, e = PredCases.size(); i != e; ++i)
1271	if (PredCases [i].Dest == BB) {
1272	PTIHandled.insert(x: PredCases [i].Value);
1273
1274	if (PredHasWeights \|\| SuccHasWeights) {
1275	WeightsForHandled [PredCases [i].Value] = Weights [i + `1`];
1276	std::swap(a&: Weights [i + `1`], b&: Weights.back());
1277	Weights.pop_back();
1278	}
1279
1280	std::swap(a&: PredCases [i], b&: PredCases.back());
1281	PredCases.pop_back();
1282	--i;
1283	--e;
1284	}
1285
1286	// Okay, now we know which constants were sent to BB from the
1287	// predecessor. Figure out where they will all go now.
1288	for (unsigned i = `0`, e = BBCases.size(); i != e; ++i)
1289	if (PTIHandled.count(x: BBCases [i].Value)) {
1290	// If this is one we are capable of getting...
1291	if (PredHasWeights \|\| SuccHasWeights)
1292	Weights.push_back(Elt: WeightsForHandled [BBCases [i].Value]);
1293	PredCases.push_back(x: BBCases [i]);
1294	++NewSuccessors [BBCases [i].Dest];
1295	PTIHandled.erase(x: BBCases [i].Value); // This constant is taken care of
1296	}
1297
1298	// If there are any constants vectored to BB that TI doesn't handle,
1299	// they must go to the default destination of TI.
1300	for (ConstantInt *I : PTIHandled) {
1301	if (PredHasWeights \|\| SuccHasWeights)
1302	Weights.push_back(Elt: WeightsForHandled [I]);
1303	PredCases.push_back(x: ValueEqualityComparisonCase (I, BBDefault));
1304	++NewSuccessors [BBDefault];
1305	}
1306	}
1307
1308	// Okay, at this point, we know which new successor Pred will get. Make
1309	// sure we update the number of entries in the PHI nodes for these
1310	// successors.
1311	SmallPtrSet<BasicBlock *, `2`> SuccsOfPred;
1312	if (DTU) {
1313	SuccsOfPred = {succ_begin(BB: Pred), succ_end(BB: Pred)};
1314	Updates.reserve(N: Updates.size() + NewSuccessors.size());
1315	}
1316	for (const std::pair<BasicBlock , int* /Num/> &NewSuccessor :
1317	NewSuccessors) {
1318	for (auto I : seq(Size: NewSuccessor.second)) {
1319	(void)I;
1320	AddPredecessorToBlock(Succ: NewSuccessor.first, NewPred: Pred, ExistPred: BB);
1321	}
1322	if (DTU && !SuccsOfPred.contains(Ptr: NewSuccessor.first))
1323	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewSuccessor.first});
1324	}
1325
1326	Builder.SetInsertPoint(PTI);
1327	// Convert pointer to int before we switch.
1328	if (CV->getType()->isPointerTy()) {
1329	CV =
1330	Builder.CreatePtrToInt(V: CV, DestTy: DL.getIntPtrType(CV->getType()), Name: "magicptr");
1331	}
1332
1333	// Now that the successors are updated, create the new Switch instruction.
1334	SwitchInst *NewSI = Builder.CreateSwitch(V: CV, Dest: PredDefault, NumCases: PredCases.size());
1335	NewSI->setDebugLoc(PTI->getDebugLoc());
1336	for (ValueEqualityComparisonCase &V : PredCases)
1337	NewSI->addCase(OnVal: V.Value, Dest: V.Dest);
1338
1339	if (PredHasWeights \|\| SuccHasWeights) {
1340	// Halve the weights if any of them cannot fit in an uint32_t
1341	FitWeights(Weights);
1342
1343	SmallVector<uint32_t, `8`> MDWeights(Weights.begin(), Weights.end());
1344
1345	setBranchWeights(SI: NewSI, Weights: MDWeights);
1346	}
1347
1348	EraseTerminatorAndDCECond(TI: PTI);
1349
1350	// Okay, last check. If BB is still a successor of PSI, then we must
1351	// have an infinite loop case. If so, add an infinitely looping block
1352	// to handle the case to preserve the behavior of the code.
1353	BasicBlock InfLoopBlock = nullptr*;
1354	for (unsigned i = `0`, e = NewSI->getNumSuccessors(); i != e; ++i)
1355	if (NewSI->getSuccessor(idx: i) == BB) {
1356	if (!InfLoopBlock) {
1357	// Insert it at the end of the function, because it's either code,
1358	// or it won't matter if it's hot. :)
1359	InfLoopBlock =
1360	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
1361	BranchInst::Create(IfTrue: InfLoopBlock, InsertAtEnd: InfLoopBlock);
1362	if (DTU)
1363	Updates.push_back(
1364	Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1365	}
1366	NewSI->setSuccessor(idx: i, NewSucc: InfLoopBlock);
1367	}
1368
1369	if (DTU) {
1370	if (InfLoopBlock)
1371	Updates.push_back(Elt: {DominatorTree::Insert, Pred, InfLoopBlock});
1372
1373	Updates.push_back(Elt: {DominatorTree::Delete, Pred, BB});
1374
1375	DTU->applyUpdates(Updates);
1376	}
1377
1378	++NumFoldValueComparisonIntoPredecessors;
1379	return true;
1380	}
1381
1382	/// The specified terminator is a value equality comparison instruction
1383	/// (either a switch or a branch on "X == c").
1384	/// See if any of the predecessors of the terminator block are value comparisons
1385	/// on the same value. If so, and if safe to do so, fold them together.
1386	bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1387	IRBuilder<> &Builder) {
1388	BasicBlock *BB = TI->getParent();
1389	Value CV = isValueEqualityComparison(TI); // CondVal*
1390	assert(CV && "Not a comparison?");
1391
1392	bool Changed = false;
1393
1394	SmallSetVector<BasicBlock *, `16`> Preds(pred_begin(BB), pred_end(BB));
1395	while (!Preds.empty()) {
1396	BasicBlock *Pred = Preds.pop_back_val();
1397	Instruction *PTI = Pred->getTerminator();
1398
1399	// Don't try to fold into itself.
1400	if (Pred == BB)
1401	continue;
1402
1403	// See if the predecessor is a comparison with the same value.
1404	Value PCV = isValueEqualityComparison(TI: PTI); // PredCondVal*
1405	if (PCV != CV)
1406	continue;
1407
1408	SmallSetVector<BasicBlock *, `4`> FailBlocks;
1409	if (!SafeToMergeTerminators(SI1: TI, SI2: PTI, FailBlocks: &FailBlocks)) {
1410	for (auto *Succ : FailBlocks) {
1411	if (!SplitBlockPredecessors(BB: Succ, Preds: TI->getParent(), Suffix: ".fold.split", DTU))
1412	return false;
1413	}
1414	}
1415
1416	PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1417	Changed = true;
1418	}
1419	return Changed;
1420	}
1421
1422	// If we would need to insert a select that uses the value of this invoke
1423	// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1424	// need to do this), we can't hoist the invoke, as there is nowhere to put the
1425	// select in this case.
1426	static bool isSafeToHoistInvoke(BasicBlock BB1, BasicBlock BB2,
1427	Instruction I1, Instruction I2) {
1428	for (BasicBlock *Succ : successors(BB: BB1)) {
1429	for (const PHINode &PN : Succ->phis()) {
1430	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1431	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1432	if (BB1V != BB2V && (BB1V == I1 \|\| BB2V == I2)) {
1433	return false;
1434	}
1435	}
1436	}
1437	return true;
1438	}
1439
1440	// Get interesting characteristics of instructions that
1441	// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1442	// instructions can be reordered across.
1443	enum SkipFlags {
1444	SkipReadMem = `1`,
1445	SkipSideEffect = `2`,
1446	SkipImplicitControlFlow = `4`
1447	};
1448
1449	static unsigned skippedInstrFlags(Instruction *I) {
1450	unsigned Flags = `0`;
1451	if (I->mayReadFromMemory())
1452	Flags \|= SkipReadMem;
1453	// We can't arbitrarily move around allocas, e.g. moving allocas (especially
1454	// inalloca) across stacksave/stackrestore boundaries.
1455	if (I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I))
1456	Flags \|= SkipSideEffect;
1457	if (!isGuaranteedToTransferExecutionToSuccessor(I))
1458	Flags \|= SkipImplicitControlFlow;
1459	return Flags;
1460	}
1461
1462	// Returns true if it is safe to reorder an instruction across preceding
1463	// instructions in a basic block.
1464	static bool isSafeToHoistInstr(Instruction I, unsigned* Flags) {
1465	// Don't reorder a store over a load.
1466	if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1467	return false;
1468
1469	// If we have seen an instruction with side effects, it's unsafe to reorder an
1470	// instruction which reads memory or itself has side effects.
1471	if ((Flags & SkipSideEffect) &&
1472	(I->mayReadFromMemory() \|\| I->mayHaveSideEffects() \|\| isa<AllocaInst>(Val: I)))
1473	return false;
1474
1475	// Reordering across an instruction which does not necessarily transfer
1476	// control to the next instruction is speculation.
1477	if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1478	return false;
1479
1480	// Hoisting of llvm.deoptimize is only legal together with the next return
1481	// instruction, which this pass is not always able to do.
1482	if (auto *CB = dyn_cast<CallBase>(Val: I))
1483	if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1484	return false;
1485
1486	// It's also unsafe/illegal to hoist an instruction above its instruction
1487	// operands
1488	BasicBlock *BB = I->getParent();
1489	for (Value *Op : I->operands()) {
1490	if (auto *J = dyn_cast<Instruction>(Val: Op))
1491	if (J->getParent() == BB)
1492	return false;
1493	}
1494
1495	return true;
1496	}
1497
1498	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified = false);
1499
1500	/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1501	/// instructions \p I1 and \p I2 can and should be hoisted.
1502	static bool shouldHoistCommonInstructions(Instruction I1, Instruction I2,
1503	const TargetTransformInfo &TTI) {
1504	// If we're going to hoist a call, make sure that the two instructions
1505	// we're commoning/hoisting are both marked with musttail, or neither of
1506	// them is marked as such. Otherwise, we might end up in a situation where
1507	// we hoist from a block where the terminator is a `ret` to a block where
1508	// the terminator is a `br`, and `musttail` calls expect to be followed by
1509	// a return.
1510	auto *C1 = dyn_cast<CallInst>(Val: I1);
1511	auto *C2 = dyn_cast<CallInst>(Val: I2);
1512	if (C1 && C2)
1513	if (C1->isMustTailCall() != C2->isMustTailCall())
1514	return false;
1515
1516	if (!TTI.isProfitableToHoist(I: I1) \|\| !TTI.isProfitableToHoist(I: I2))
1517	return false;
1518
1519	// If any of the two call sites has nomerge or convergent attribute, stop
1520	// hoisting.
1521	if (const auto *CB1 = dyn_cast<CallBase>(Val: I1))
1522	if (CB1->cannotMerge() \|\| CB1->isConvergent())
1523	return false;
1524	if (const auto *CB2 = dyn_cast<CallBase>(Val: I2))
1525	if (CB2->cannotMerge() \|\| CB2->isConvergent())
1526	return false;
1527
1528	return true;
1529	}
1530
1531	/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1532	/// in lock-step to \p TI. This matches how dbg. intrinsics are hoisting in*
1533	/// hoistCommonCodeFromSuccessors. e.g. The input:
1534	/// I1 DVRs: { x, z },
1535	/// OtherInsts: { I2 DVRs: { x, y, z } }
1536	/// would result in hoisting only DbgVariableRecord x.
1537	static void hoistLockstepIdenticalDbgVariableRecords(
1538	Instruction TI, Instruction I1,
1539	SmallVectorImpl<Instruction *> &OtherInsts) {
1540	if (!I1->hasDbgRecords())
1541	return;
1542	using CurrentAndEndIt =
1543	std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1544	// Vector of {Current, End} iterators.
1545	SmallVector<CurrentAndEndIt> Itrs;
1546	Itrs.reserve(N: OtherInsts.size() + `1`);
1547	// Helper lambdas for lock-step checks:
1548	// Return true if this Current == End.
1549	auto atEnd = [](const CurrentAndEndIt &Pair) {
1550	return Pair.first == Pair.second;
1551	};
1552	// Return true if all Current are identical.
1553	auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1554	return all_of(Range: make_first_range(c: ArrayRef(Itrs).drop_front()),
1555	P: [&](DbgRecord::self_iterator I) {
1556	return Itrs [`0`].first ->isIdenticalToWhenDefined(R: *I);
1557	});
1558	};
1559
1560	// Collect the iterators.
1561	Itrs.push_back(
1562	Elt: {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1563	for (Instruction *Other : OtherInsts) {
1564	if (!Other->hasDbgRecords())
1565	return;
1566	Itrs.push_back(
1567	Elt: {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1568	}
1569
1570	// Iterate in lock-step until any of the DbgRecord lists are exausted. If
1571	// the lock-step DbgRecord are identical, hoist all of them to TI.
1572	// This replicates the dbg. intrinsic behaviour in*
1573	// hoistCommonCodeFromSuccessors.
1574	while (none_of(Range&: Itrs, P: atEnd)) {
1575	bool HoistDVRs = allIdentical (Itrs);
1576	for (CurrentAndEndIt &Pair : Itrs) {
1577	// Increment Current iterator now as we may be about to move the
1578	// DbgRecord.
1579	DbgRecord &DR = *Pair.first ++;
1580	if (HoistDVRs) {
1581	DR.removeFromParent();
1582	TI->getParent()->insertDbgRecordBefore(DR: &DR, Here: TI->getIterator());
1583	}
1584	}
1585	}
1586	}
1587
1588	/// Hoist any common code in the successor blocks up into the block. This
1589	/// function guarantees that BB dominates all successors. If EqTermsOnly is
1590	/// given, only perform hoisting in case both blocks only contain a terminator.
1591	/// In that case, only the original BI will be replaced and selects for PHIs are
1592	/// added.
1593	bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1594	bool EqTermsOnly) {
1595	// This does very trivial matching, with limited scanning, to find identical
1596	// instructions in the two blocks. In particular, we don't want to get into
1597	// O(N1N2...) situations here where Ni are the sizes of these successors. As
1598	// such, we currently just scan for obviously identical instructions in an
1599	// identical order, possibly separated by the same number of non-identical
1600	// instructions.
1601	unsigned int SuccSize = succ_size(BB);
1602	if (SuccSize < `2`)
1603	return false;
1604
1605	// If either of the blocks has it's address taken, then we can't do this fold,
1606	// because the code we'd hoist would no longer run when we jump into the block
1607	// by it's address.
1608	for (auto *Succ : successors(BB))
1609	if (Succ->hasAddressTaken() \|\| !Succ->getSinglePredecessor())
1610	return false;
1611
1612	auto *TI = BB->getTerminator();
1613
1614	// The second of pair is a SkipFlags bitmask.
1615	using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1616	SmallVector<SuccIterPair, `8`> SuccIterPairs;
1617	for (auto *Succ : successors(BB)) {
1618	BasicBlock::iterator SuccItr = Succ->begin();
1619	if (isa<PHINode>(Val: *SuccItr))
1620	return false;
1621	SuccIterPairs.push_back(Elt: SuccIterPair (SuccItr, `0`));
1622	}
1623
1624	// Check if only hoisting terminators is allowed. This does not add new
1625	// instructions to the hoist location.
1626	if (EqTermsOnly) {
1627	// Skip any debug intrinsics, as they are free to hoist.
1628	for (auto &SuccIter : make_first_range(c&: SuccIterPairs)) {
1629	auto INonDbg = &skipDebugIntrinsics(It: SuccIter);
1630	if (!INonDbg->isTerminator())
1631	return false;
1632	}
1633	// Now we know that we only need to hoist debug intrinsics and the
1634	// terminator. Let the loop below handle those 2 cases.
1635	}
1636
1637	// Count how many instructions were not hoisted so far. There's a limit on how
1638	// many instructions we skip, serving as a compilation time control as well as
1639	// preventing excessive increase of life ranges.
1640	unsigned NumSkipped = `0`;
1641	// If we find an unreachable instruction at the beginning of a basic block, we
1642	// can still hoist instructions from the rest of the basic blocks.
1643	if (SuccIterPairs.size() > `2`) {
1644	erase_if(C&: SuccIterPairs,
1645	P: [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1646	if (SuccIterPairs.size() < `2`)
1647	return false;
1648	}
1649
1650	bool Changed = false;
1651
1652	for (;;) {
1653	auto *SuccIterPairBegin = SuccIterPairs.begin();
1654	auto &BB1ItrPair = *SuccIterPairBegin++;
1655	auto OtherSuccIterPairRange =
1656	iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1657	auto OtherSuccIterRange = make_first_range(c&: OtherSuccIterPairRange);
1658
1659	Instruction I1 = &BB1ItrPair.first;
1660
1661	// Skip debug info if it is not identical.
1662	bool AllDbgInstsAreIdentical = all_of(Range&: OtherSuccIterRange, P: [I1](auto &Iter) {
1663	Instruction I2 = &Iter;
1664	return I1->isIdenticalToWhenDefined(I: I2);
1665	});
1666	if (!AllDbgInstsAreIdentical) {
1667	while (isa<DbgInfoIntrinsic>(Val: I1))
1668	I1 = &*++BB1ItrPair.first;
1669	for (auto &SuccIter : OtherSuccIterRange) {
1670	Instruction I2 = &SuccIter;
1671	while (isa<DbgInfoIntrinsic>(Val: I2))
1672	I2 = &*++SuccIter;
1673	}
1674	}
1675
1676	bool AllInstsAreIdentical = true;
1677	bool HasTerminator = I1->isTerminator();
1678	for (auto &SuccIter : OtherSuccIterRange) {
1679	Instruction I2 = &SuccIter;
1680	HasTerminator \|= I2->isTerminator();
1681	if (AllInstsAreIdentical && (!I1->isIdenticalToWhenDefined(I: I2) \|\|
1682	MMRAMetadata (I1) != MMRAMetadata (I2)))
1683	AllInstsAreIdentical = false;
1684	}
1685
1686	SmallVector<Instruction *, `8`> OtherInsts;
1687	for (auto &SuccIter : OtherSuccIterRange)
1688	OtherInsts.push_back(Elt: &*SuccIter);
1689
1690	// If we are hoisting the terminator instruction, don't move one (making a
1691	// broken BB), instead clone it, and remove BI.
1692	if (HasTerminator) {
1693	// Even if BB, which contains only one unreachable instruction, is ignored
1694	// at the beginning of the loop, we can hoist the terminator instruction.
1695	// If any instructions remain in the block, we cannot hoist terminators.
1696	if (NumSkipped \|\| !AllInstsAreIdentical) {
1697	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1698	return Changed;
1699	}
1700
1701	return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, OtherSuccTIs&: OtherInsts) \|\|
1702	Changed;
1703	}
1704
1705	if (AllInstsAreIdentical) {
1706	unsigned SkipFlagsBB1 = BB1ItrPair.second;
1707	AllInstsAreIdentical =
1708	isSafeToHoistInstr(I: I1, Flags: SkipFlagsBB1) &&
1709	all_of(Range&: OtherSuccIterPairRange, P: [=](const auto &Pair) {
1710	Instruction I2 = &Pair.first;
1711	unsigned SkipFlagsBB2 = Pair.second;
1712	// Even if the instructions are identical, it may not
1713	// be safe to hoist them if we have skipped over
1714	// instructions with side effects or their operands
1715	// weren't hoisted.
1716	return isSafeToHoistInstr(I: I2, Flags: SkipFlagsBB2) &&
1717	shouldHoistCommonInstructions(I1, I2, TTI);
1718	});
1719	}
1720
1721	if (AllInstsAreIdentical) {
1722	BB1ItrPair.first ++;
1723	if (isa<DbgInfoIntrinsic>(Val: I1)) {
1724	// The debug location is an integral part of a debug info intrinsic
1725	// and can't be separated from it or replaced. Instead of attempting
1726	// to merge locations, simply hoist both copies of the intrinsic.
1727	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1728	// We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1729	// and leave any that were not hoisted behind (by calling moveBefore
1730	// rather than moveBeforePreserving).
1731	I1->moveBefore(MovePos: TI);
1732	for (auto &SuccIter : OtherSuccIterRange) {
1733	auto I2 = &SuccIter ++;
1734	assert(isa<DbgInfoIntrinsic>(I2));
1735	I2->moveBefore(MovePos: TI);
1736	}
1737	} else {
1738	// For a normal instruction, we just move one to right before the
1739	// branch, then replace all uses of the other with the first. Finally,
1740	// we remove the now redundant second instruction.
1741	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1742	// We've just hoisted DbgVariableRecords; move I1 after them (before TI)
1743	// and leave any that were not hoisted behind (by calling moveBefore
1744	// rather than moveBeforePreserving).
1745	I1->moveBefore(MovePos: TI);
1746	for (auto &SuccIter : OtherSuccIterRange) {
1747	Instruction I2 = &SuccIter ++;
1748	assert(I2 != I1);
1749	if (!I2->use_empty())
1750	I2->replaceAllUsesWith(V: I1);
1751	I1->andIRFlags(V: I2);
1752	combineMetadataForCSE(K: I1, J: I2, DoesKMove: true);
1753	// I1 and I2 are being combined into a single instruction. Its debug
1754	// location is the merged locations of the original instructions.
1755	I1->applyMergedLocation(LocA: I1->getDebugLoc(), LocB: I2->getDebugLoc());
1756	I2->eraseFromParent();
1757	}
1758	}
1759	if (!Changed)
1760	NumHoistCommonCode += SuccIterPairs.size();
1761	Changed = true;
1762	NumHoistCommonInstrs += SuccIterPairs.size();
1763	} else {
1764	if (NumSkipped >= HoistCommonSkipLimit) {
1765	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1766	return Changed;
1767	}
1768	// We are about to skip over a pair of non-identical instructions. Record
1769	// if any have characteristics that would prevent reordering instructions
1770	// across them.
1771	for (auto &SuccIterPair : SuccIterPairs) {
1772	Instruction I = &SuccIterPair.first ++;
1773	SuccIterPair.second \|= skippedInstrFlags(I);
1774	}
1775	++NumSkipped;
1776	}
1777	}
1778	}
1779
1780	bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1781	Instruction TI, Instruction I1,
1782	SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1783
1784	auto *BI = dyn_cast<BranchInst>(Val: TI);
1785
1786	bool Changed = false;
1787	BasicBlock *TIParent = TI->getParent();
1788	BasicBlock *BB1 = I1->getParent();
1789
1790	// Use only for an if statement.
1791	auto I2 = OtherSuccTIs.begin();
1792	auto *BB2 = I2->getParent();
1793	if (BI) {
1794	assert(OtherSuccTIs.size() == `1`);
1795	assert(BI->getSuccessor(`0`) == I1->getParent());
1796	assert(BI->getSuccessor(`1`) == I2->getParent());
1797	}
1798
1799	// In the case of an if statement, we try to hoist an invoke.
1800	// FIXME: Can we define a safety predicate for CallBr?
1801	// FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1802	// removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1803	if (isa<InvokeInst>(Val: I1) && (!BI \|\| !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1804	return false;
1805
1806	// TODO: callbr hoisting currently disabled pending further study.
1807	if (isa<CallBrInst>(Val: I1))
1808	return false;
1809
1810	for (BasicBlock *Succ : successors(BB: BB1)) {
1811	for (PHINode &PN : Succ->phis()) {
1812	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1813	for (Instruction *OtherSuccTI : OtherSuccTIs) {
1814	Value *BB2V = PN.getIncomingValueForBlock(BB: OtherSuccTI->getParent());
1815	if (BB1V == BB2V)
1816	continue;
1817
1818	// In the case of an if statement, check for
1819	// passingValueIsAlwaysUndefined here because we would rather eliminate
1820	// undefined control flow then converting it to a select.
1821	if (!BI \|\| passingValueIsAlwaysUndefined(V: BB1V, I: &PN) \|\|
1822	passingValueIsAlwaysUndefined(V: BB2V, I: &PN))
1823	return false;
1824	}
1825	}
1826	}
1827
1828	// Hoist DbgVariableRecords attached to the terminator to match dbg.*
1829	// intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
1830	hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts&: OtherSuccTIs);
1831	// Clone the terminator and hoist it into the pred, without any debug info.
1832	Instruction *NT = I1->clone();
1833	NT->insertInto(ParentBB: TIParent, It: TI->getIterator());
1834	if (!NT->getType()->isVoidTy()) {
1835	I1->replaceAllUsesWith(V: NT);
1836	for (Instruction *OtherSuccTI : OtherSuccTIs)
1837	OtherSuccTI->replaceAllUsesWith(V: NT);
1838	NT->takeName(V: I1);
1839	}
1840	Changed = true;
1841	NumHoistCommonInstrs += OtherSuccTIs.size() + `1`;
1842
1843	// Ensure terminator gets a debug location, even an unknown one, in case
1844	// it involves inlinable calls.
1845	SmallVector<DILocation *, `4`> Locs;
1846	Locs.push_back(Elt: I1->getDebugLoc());
1847	for (auto *OtherSuccTI : OtherSuccTIs)
1848	Locs.push_back(Elt: OtherSuccTI->getDebugLoc());
1849	NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1850
1851	// PHIs created below will adopt NT's merged DebugLoc.
1852	IRBuilder<NoFolder> Builder(NT);
1853
1854	// In the case of an if statement, hoisting one of the terminators from our
1855	// successor is a great thing. Unfortunately, the successors of the if/else
1856	// blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
1857	// must agree for all PHI nodes, so we insert select instruction to compute
1858	// the final result.
1859	if (BI) {
1860	std::map<std::pair<Value , Value >, SelectInst *> InsertedSelects;
1861	for (BasicBlock *Succ : successors(BB: BB1)) {
1862	for (PHINode &PN : Succ->phis()) {
1863	Value *BB1V = PN.getIncomingValueForBlock(BB: BB1);
1864	Value *BB2V = PN.getIncomingValueForBlock(BB: BB2);
1865	if (BB1V == BB2V)
1866	continue;
1867
1868	// These values do not agree. Insert a select instruction before NT
1869	// that determines the right value.
1870	SelectInst *&SI = InsertedSelects [std::make_pair(x&: BB1V, y&: BB2V)];
1871	if (!SI) {
1872	// Propagate fast-math-flags from phi node to its replacement select.
1873	IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1874	if (isa<FPMathOperator>(Val: PN))
1875	Builder.setFastMathFlags(PN.getFastMathFlags());
1876
1877	SI = cast<SelectInst>(Val: Builder.CreateSelect(
1878	C: BI->getCondition(), True: BB1V, False: BB2V,
1879	Name: BB1V->getName() + "." + BB2V->getName(), MDFrom: BI));
1880	}
1881
1882	// Make the PHI node use the select for all incoming values for BB1/BB2
1883	for (unsigned i = `0`, e = PN.getNumIncomingValues(); i != e; ++i)
1884	if (PN.getIncomingBlock(i) == BB1 \|\| PN.getIncomingBlock(i) == BB2)
1885	PN.setIncomingValue(i, V: SI);
1886	}
1887	}
1888	}
1889
1890	SmallVector<DominatorTree::UpdateType, `4`> Updates;
1891
1892	// Update any PHI nodes in our new successors.
1893	for (BasicBlock *Succ : successors(BB: BB1)) {
1894	AddPredecessorToBlock(Succ, NewPred: TIParent, ExistPred: BB1);
1895	if (DTU)
1896	Updates.push_back(Elt: {DominatorTree::Insert, TIParent, Succ});
1897	}
1898
1899	if (DTU)
1900	for (BasicBlock *Succ : successors(I: TI))
1901	Updates.push_back(Elt: {DominatorTree::Delete, TIParent, Succ});
1902
1903	EraseTerminatorAndDCECond(TI);
1904	if (DTU)
1905	DTU->applyUpdates(Updates);
1906	return Changed;
1907	}
1908
1909	// Check lifetime markers.
1910	static bool isLifeTimeMarker(const Instruction *I) {
1911	if (auto II = dyn_cast<IntrinsicInst>(Val: I)) {
1912	switch (II->getIntrinsicID()) {
1913	default:
1914	break;
1915	case Intrinsic::lifetime_start:
1916	case Intrinsic::lifetime_end:
1917	return true;
1918	}
1919	}
1920	return false;
1921	}
1922
1923	// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1924	// into variables.
1925	static bool replacingOperandWithVariableIsCheap(const Instruction *I,
1926	int OpIdx) {
1927	return !isa<IntrinsicInst>(Val: I);
1928	}
1929
1930	// All instructions in Insts belong to different blocks that all unconditionally
1931	// branch to a common successor. Analyze each instruction and return true if it
1932	// would be possible to sink them into their successor, creating one common
1933	// instruction instead. For every value that would be required to be provided by
1934	// PHI node (because an operand varies in each input block), add to PHIOperands.
1935	static bool canSinkInstructions(
1936	ArrayRef<Instruction *> Insts,
1937	DenseMap<Instruction , SmallVector<Value , `4`>> &PHIOperands) {
1938	// Prune out obviously bad instructions to move. Each instruction must have
1939	// exactly zero or one use, and we check later that use is by a single, common
1940	// PHI instruction in the successor.
1941	bool HasUse = !Insts.front()->user_empty();
1942	for (auto *I : Insts) {
1943	// These instructions may change or break semantics if moved.
1944	if (isa<PHINode>(Val: I) \|\| I->isEHPad() \|\| isa<AllocaInst>(Val: I) \|\|
1945	I->getType()->isTokenTy())
1946	return false;
1947
1948	// Do not try to sink an instruction in an infinite loop - it can cause
1949	// this algorithm to infinite loop.
1950	if (I->getParent()->getSingleSuccessor() == I->getParent())
1951	return false;
1952
1953	// Conservatively return false if I is an inline-asm instruction. Sinking
1954	// and merging inline-asm instructions can potentially create arguments
1955	// that cannot satisfy the inline-asm constraints.
1956	// If the instruction has nomerge or convergent attribute, return false.
1957	if (const auto *C = dyn_cast<CallBase>(Val: I))
1958	if (C->isInlineAsm() \|\| C->cannotMerge() \|\| C->isConvergent())
1959	return false;
1960
1961	// Each instruction must have zero or one use.
1962	if (HasUse && !I->hasOneUse())
1963	return false;
1964	if (!HasUse && !I->user_empty())
1965	return false;
1966	}
1967
1968	const Instruction *I0 = Insts.front();
1969	const auto I0MMRA = MMRAMetadata (*I0);
1970	for (auto *I : Insts) {
1971	if (!I->isSameOperationAs(I: I0))
1972	return false;
1973
1974	// swifterror pointers can only be used by a load or store; sinking a load
1975	// or store would require introducing a select for the pointer operand,
1976	// which isn't allowed for swifterror pointers.
1977	if (isa<StoreInst>(Val: I) && I->getOperand(i: `1`)->isSwiftError())
1978	return false;
1979	if (isa<LoadInst>(Val: I) && I->getOperand(i: `0`)->isSwiftError())
1980	return false;
1981
1982	// Treat MMRAs conservatively. This pass can be quite aggressive and
1983	// could drop a lot of MMRAs otherwise.
1984	if (MMRAMetadata (*I) != I0MMRA)
1985	return false;
1986	}
1987
1988	// All instructions in Insts are known to be the same opcode. If they have a
1989	// use, check that the only user is a PHI or in the same block as the
1990	// instruction, because if a user is in the same block as an instruction we're
1991	// contemplating sinking, it must already be determined to be sinkable.
1992	if (HasUse) {
1993	auto PNUse = dyn_cast<PHINode>(Val: I0->user_begin());
1994	auto *Succ = I0->getParent()->getTerminator()->getSuccessor(Idx: `0`);
1995	if (!all_of(Range&: Insts, P: [&PNUse,&Succ](const Instruction I) -> bool* {
1996	auto U = cast<Instruction>(Val: I->user_begin());
1997	return (PNUse &&
1998	PNUse->getParent() == Succ &&
1999	PNUse->getIncomingValueForBlock(BB: I->getParent()) == I) \|\|
2000	U->getParent() == I->getParent();
2001	}))
2002	return false;
2003	}
2004
2005	// Because SROA can't handle speculating stores of selects, try not to sink
2006	// loads, stores or lifetime markers of allocas when we'd have to create a
2007	// PHI for the address operand. Also, because it is likely that loads or
2008	// stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
2009	// them.
2010	// This can cause code churn which can have unintended consequences down
2011	// the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
2012	// FIXME: This is a workaround for a deficiency in SROA - see
2013	// https://llvm.org/bugs/show_bug.cgi?id=30188
2014	if (isa<StoreInst>(Val: I0) && any_of(Range&: Insts, P: [](const Instruction *I) {
2015	return isa<AllocaInst>(Val: I->getOperand(i: `1`)->stripPointerCasts());
2016	}))
2017	return false;
2018	if (isa<LoadInst>(Val: I0) && any_of(Range&: Insts, P: [](const Instruction *I) {
2019	return isa<AllocaInst>(Val: I->getOperand(i: `0`)->stripPointerCasts());
2020	}))
2021	return false;
2022	if (isLifeTimeMarker(I: I0) && any_of(Range&: Insts, P: [](const Instruction *I) {
2023	return isa<AllocaInst>(Val: I->getOperand(i: `1`)->stripPointerCasts());
2024	}))
2025	return false;
2026
2027	// For calls to be sinkable, they must all be indirect, or have same callee.
2028	// I.e. if we have two direct calls to different callees, we don't want to
2029	// turn that into an indirect call. Likewise, if we have an indirect call,
2030	// and a direct call, we don't actually want to have a single indirect call.
2031	if (isa<CallBase>(Val: I0)) {
2032	auto IsIndirectCall = [](const Instruction *I) {
2033	return cast<CallBase>(Val: I)->isIndirectCall();
2034	};
2035	bool HaveIndirectCalls = any_of(Range&: Insts, P: IsIndirectCall);
2036	bool AllCallsAreIndirect = all_of(Range&: Insts, P: IsIndirectCall);
2037	if (HaveIndirectCalls) {
2038	if (!AllCallsAreIndirect)
2039	return false;
2040	} else {
2041	// All callees must be identical.
2042	Value Callee = nullptr*;
2043	for (const Instruction *I : Insts) {
2044	Value *CurrCallee = cast<CallBase>(Val: I)->getCalledOperand();
2045	if (!Callee)
2046	Callee = CurrCallee;
2047	else if (Callee != CurrCallee)
2048	return false;
2049	}
2050	}
2051	}
2052
2053	for (unsigned OI = `0`, OE = I0->getNumOperands(); OI != OE; ++OI) {
2054	Value *Op = I0->getOperand(i: OI);
2055	if (Op->getType()->isTokenTy())
2056	// Don't touch any operand of token type.
2057	return false;
2058
2059	auto SameAsI0 = [&I0, OI](const Instruction *I) {
2060	assert(I->getNumOperands() == I0->getNumOperands());
2061	return I->getOperand(i: OI) == I0->getOperand(i: OI);
2062	};
2063	if (!all_of(Range&: Insts, P: SameAsI0)) {
2064	if ((isa<Constant>(Val: Op) && !replacingOperandWithVariableIsCheap(I: I0, OpIdx: OI)) \|\|
2065	!canReplaceOperandWithVariable(I: I0, OpIdx: OI))
2066	// We can't create a PHI from this GEP.
2067	return false;
2068	for (auto *I : Insts)
2069	PHIOperands [I].push_back(Elt: I->getOperand(i: OI));
2070	}
2071	}
2072	return true;
2073	}
2074
2075	// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2076	// instruction of every block in Blocks to their common successor, commoning
2077	// into one instruction.
2078	static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2079	auto *BBEnd = Blocks [`0`]->getTerminator()->getSuccessor(Idx: `0`);
2080
2081	// canSinkInstructions returning true guarantees that every block has at
2082	// least one non-terminator instruction.
2083	SmallVector<Instruction*,`4`> Insts;
2084	for (auto *BB : Blocks) {
2085	Instruction *I = BB->getTerminator();
2086	do {
2087	I = I->getPrevNode();
2088	} while (isa<DbgInfoIntrinsic>(Val: I) && I != &BB->front());
2089	if (!isa<DbgInfoIntrinsic>(Val: I))
2090	Insts.push_back(Elt: I);
2091	}
2092
2093	// The only checking we need to do now is that all users of all instructions
2094	// are the same PHI node. canSinkInstructions should have checked this but
2095	// it is slightly over-aggressive - it gets confused by commutative
2096	// instructions so double-check it here.
2097	Instruction *I0 = Insts.front();
2098	if (!I0->user_empty()) {
2099	auto PNUse = dyn_cast<PHINode>(Val: I0->user_begin());
2100	if (!all_of(Range&: Insts, P: [&PNUse](const Instruction I) -> bool* {
2101	auto U = cast<Instruction>(Val: I->user_begin());
2102	return U == PNUse;
2103	}))
2104	return false;
2105	}
2106
2107	// We don't need to do any more checking here; canSinkInstructions should
2108	// have done it all for us.
2109	SmallVector<Value*, `4`> NewOperands;
2110	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O) {
2111	// This check is different to that in canSinkInstructions. There, we
2112	// cared about the global view once simplifycfg (and instcombine) have
2113	// completed - it takes into account PHIs that become trivially
2114	// simplifiable. However here we need a more local view; if an operand
2115	// differs we create a PHI and rely on instcombine to clean up the very
2116	// small mess we may make.
2117	bool NeedPHI = any_of(Range&: Insts, P: [&I0, O](const Instruction *I) {
2118	return I->getOperand(i: O) != I0->getOperand(i: O);
2119	});
2120	if (!NeedPHI) {
2121	NewOperands.push_back(Elt: I0->getOperand(i: O));
2122	continue;
2123	}
2124
2125	// Create a new PHI in the successor block and populate it.
2126	auto *Op = I0->getOperand(i: O);
2127	assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2128	auto *PN =
2129	PHINode::Create(Ty: Op->getType(), NumReservedValues: Insts.size(), NameStr: Op->getName() + ".sink");
2130	PN->insertBefore(InsertPos: BBEnd->begin());
2131	for (auto *I : Insts)
2132	PN->addIncoming(V: I->getOperand(i: O), BB: I->getParent());
2133	NewOperands.push_back(Elt: PN);
2134	}
2135
2136	// Arbitrarily use I0 as the new "common" instruction; remap its operands
2137	// and move it to the start of the successor block.
2138	for (unsigned O = `0`, E = I0->getNumOperands(); O != E; ++O)
2139	I0->getOperandUse(i: O).set(NewOperands [O]);
2140
2141	I0->moveBefore(BB&: *BBEnd, I: BBEnd->getFirstInsertionPt());
2142
2143	// Update metadata and IR flags, and merge debug locations.
2144	for (auto *I : Insts)
2145	if (I != I0) {
2146	// The debug location for the "common" instruction is the merged locations
2147	// of all the commoned instructions. We start with the original location
2148	// of the "common" instruction and iteratively merge each location in the
2149	// loop below.
2150	// This is an N-way merge, which will be inefficient if I0 is a CallInst.
2151	// However, as N-way merge for CallInst is rare, so we use simplified API
2152	// instead of using complex API for N-way merge.
2153	I0->applyMergedLocation(LocA: I0->getDebugLoc(), LocB: I->getDebugLoc());
2154	combineMetadataForCSE(K: I0, J: I, DoesKMove: true);
2155	I0->andIRFlags(V: I);
2156	}
2157
2158	if (!I0->user_empty()) {
2159	// canSinkLastInstruction checked that all instructions were used by
2160	// one and only one PHI node. Find that now, RAUW it to our common
2161	// instruction and nuke it.
2162	auto PN = cast<PHINode>(Val: I0->user_begin());
2163	PN->replaceAllUsesWith(V: I0);
2164	PN->eraseFromParent();
2165	}
2166
2167	// Finally nuke all instructions apart from the common instruction.
2168	for (auto *I : Insts) {
2169	if (I == I0)
2170	continue;
2171	// The remaining uses are debug users, replace those with the common inst.
2172	// In most (all?) cases this just introduces a use-before-def.
2173	assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2174	I->replaceAllUsesWith(V: I0);
2175	I->eraseFromParent();
2176	}
2177
2178	return true;
2179	}
2180
2181	namespace {
2182
2183	// LockstepReverseIterator - Iterates through instructions
2184	// in a set of blocks in reverse order from the first non-terminator.
2185	// For example (assume all blocks have size n):
2186	// LockstepReverseIterator I([B1, B2, B3]);
2187	// I-- = [B1[n], B2[n], B3[n]];*
2188	// I-- = [B1[n-1], B2[n-1], B3[n-1]];*
2189	// I-- = [B1[n-2], B2[n-2], B3[n-2]];*
2190	// ...
2191	class LockstepReverseIterator {
2192	ArrayRef<BasicBlock*> Blocks;
2193	SmallVector<Instruction*,`4`> Insts;
2194	bool Fail;
2195
2196	public:
2197	LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks (Blocks) {
2198	reset();
2199	}
2200
2201	void reset() {
2202	Fail = false;
2203	Insts.clear();
2204	for (auto *BB : Blocks) {
2205	Instruction *Inst = BB->getTerminator();
2206	for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Val: Inst);)
2207	Inst = Inst->getPrevNode();
2208	if (!Inst) {
2209	// Block wasn't big enough.
2210	Fail = true;
2211	return;
2212	}
2213	Insts.push_back(Elt: Inst);
2214	}
2215	}
2216
2217	bool isValid() const {
2218	return !Fail;
2219	}
2220
2221	void operator--() {
2222	if (Fail)
2223	return;
2224	for (auto *&Inst : Insts) {
2225	for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Val: Inst);)
2226	Inst = Inst->getPrevNode();
2227	// Already at beginning of block.
2228	if (!Inst) {
2229	Fail = true;
2230	return;
2231	}
2232	}
2233	}
2234
2235	void operator++() {
2236	if (Fail)
2237	return;
2238	for (auto *&Inst : Insts) {
2239	for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Val: Inst);)
2240	Inst = Inst->getNextNode();
2241	// Already at end of block.
2242	if (!Inst) {
2243	Fail = true;
2244	return;
2245	}
2246	}
2247	}
2248
2249	ArrayRef<Instruction> operator* * () const {
2250	return Insts;
2251	}
2252	};
2253
2254	} // end anonymous namespace
2255
2256	/// Check whether BB's predecessors end with unconditional branches. If it is
2257	/// true, sink any common code from the predecessors to BB.
2258	static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
2259	DomTreeUpdater *DTU) {
2260	// We support two situations:
2261	// (1) all incoming arcs are unconditional
2262	// (2) there are non-unconditional incoming arcs
2263	//
2264	// (2) is very common in switch defaults and
2265	// else-if patterns;
2266	//
2267	// if (a) f(1);
2268	// else if (b) f(2);
2269	//
2270	// produces:
2271	//
2272	// [if]
2273	// / \
2274	// [f(1)] [if]
2275	// \| \| \
2276	// \| \| \|
2277	// \| [f(2)]\|
2278	// \ \| /
2279	// [ end ]
2280	//
2281	// [end] has two unconditional predecessor arcs and one conditional. The
2282	// conditional refers to the implicit empty 'else' arc. This conditional
2283	// arc can also be caused by an empty default block in a switch.
2284	//
2285	// In this case, we attempt to sink code from all unconditional* arcs.*
2286	// If we can sink instructions from these arcs (determined during the scan
2287	// phase below) we insert a common successor for all unconditional arcs and
2288	// connect that to [end], to enable sinking:
2289	//
2290	// [if]
2291	// / \
2292	// [x(1)] [if]
2293	// \| \| \
2294	// \| \| \
2295	// \| [x(2)] \|
2296	// \ / \|
2297	// [sink.split] \|
2298	// \ /
2299	// [ end ]
2300	//
2301	SmallVector<BasicBlock*,`4`> UnconditionalPreds;
2302	bool HaveNonUnconditionalPredecessors = false;
2303	for (auto *PredBB : predecessors(BB)) {
2304	auto *PredBr = dyn_cast<BranchInst>(Val: PredBB->getTerminator());
2305	if (PredBr && PredBr->isUnconditional())
2306	UnconditionalPreds.push_back(Elt: PredBB);
2307	else
2308	HaveNonUnconditionalPredecessors = true;
2309	}
2310	if (UnconditionalPreds.size() < `2`)
2311	return false;
2312
2313	// We take a two-step approach to tail sinking. First we scan from the end of
2314	// each block upwards in lockstep. If the n'th instruction from the end of each
2315	// block can be sunk, those instructions are added to ValuesToSink and we
2316	// carry on. If we can sink an instruction but need to PHI-merge some operands
2317	// (because they're not identical in each instruction) we add these to
2318	// PHIOperands.
2319	int ScanIdx = `0`;
2320	SmallPtrSet<Value*,`4`> InstructionsToSink;
2321	DenseMap<Instruction, SmallVector<Value,`4`>> PHIOperands;
2322	LockstepReverseIterator LRI(UnconditionalPreds);
2323	while (LRI.isValid() &&
2324	canSinkInstructions(Insts: *LRI, PHIOperands)) {
2325	LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << (LRI)[`0`]
2326	<< "\n");
2327	InstructionsToSink.insert(I: (LRI).begin(), E: (LRI).end());
2328	++ScanIdx;
2329	--LRI;
2330	}
2331
2332	// If no instructions can be sunk, early-return.
2333	if (ScanIdx == `0`)
2334	return false;
2335
2336	bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2337
2338	if (!followedByDeoptOrUnreachable) {
2339	// Okay, we could* sink last ScanIdx instructions. But how many can we*
2340	// actually sink before encountering instruction that is unprofitable to
2341	// sink?
2342	auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2343	unsigned NumPHIdValues = `0`;
2344	for (auto I : LRI)
2345	for (auto *V : PHIOperands [I]) {
2346	if (!InstructionsToSink.contains(Ptr: V))
2347	++NumPHIdValues;
2348	// FIXME: this check is overly optimistic. We may end up not sinking
2349	// said instruction, due to the very same profitability check.
2350	// See @creating_too_many_phis in sink-common-code.ll.
2351	}
2352	LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2353	unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2354	if ((NumPHIdValues % UnconditionalPreds.size()) != `0`)
2355	NumPHIInsts++;
2356
2357	return NumPHIInsts <= `1`;
2358	};
2359
2360	// We've determined that we are going to sink last ScanIdx instructions,
2361	// and recorded them in InstructionsToSink. Now, some instructions may be
2362	// unprofitable to sink. But that determination depends on the instructions
2363	// that we are going to sink.
2364
2365	// First, forward scan: find the first instruction unprofitable to sink,
2366	// recording all the ones that are profitable to sink.
2367	// FIXME: would it be better, after we detect that not all are profitable.
2368	// to either record the profitable ones, or erase the unprofitable ones?
2369	// Maybe we need to choose (at runtime) the one that will touch least
2370	// instrs?
2371	LRI.reset();
2372	int Idx = `0`;
2373	SmallPtrSet<Value *, `4`> InstructionsProfitableToSink;
2374	while (Idx < ScanIdx) {
2375	if (!ProfitableToSinkInstruction (LRI)) {
2376	// Too many PHIs would be created.
2377	LLVM_DEBUG(
2378	dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2379	break;
2380	}
2381	InstructionsProfitableToSink.insert(I: (LRI).begin(), E: (LRI).end());
2382	--LRI;
2383	++Idx;
2384	}
2385
2386	// If no instructions can be sunk, early-return.
2387	if (Idx == `0`)
2388	return false;
2389
2390	// Did we determine that (only) some instructions are unprofitable to sink?
2391	if (Idx < ScanIdx) {
2392	// Okay, some instructions are unprofitable.
2393	ScanIdx = Idx;
2394	InstructionsToSink = InstructionsProfitableToSink;
2395
2396	// But, that may make other instructions unprofitable, too.
2397	// So, do a backward scan, do any earlier instructions become
2398	// unprofitable?
2399	assert(
2400	!ProfitableToSinkInstruction(LRI) &&
2401	"We already know that the last instruction is unprofitable to sink");
2402	++LRI;
2403	--Idx;
2404	while (Idx >= `0`) {
2405	// If we detect that an instruction becomes unprofitable to sink,
2406	// all earlier instructions won't be sunk either,
2407	// so preemptively keep InstructionsProfitableToSink in sync.
2408	// FIXME: is this the most performant approach?
2409	for (auto I : LRI)
2410	InstructionsProfitableToSink.erase(Ptr: I);
2411	if (!ProfitableToSinkInstruction (LRI)) {
2412	// Everything starting with this instruction won't be sunk.
2413	ScanIdx = Idx;
2414	InstructionsToSink = InstructionsProfitableToSink;
2415	}
2416	++LRI;
2417	--Idx;
2418	}
2419	}
2420
2421	// If no instructions can be sunk, early-return.
2422	if (ScanIdx == `0`)
2423	return false;
2424	}
2425
2426	bool Changed = false;
2427
2428	if (HaveNonUnconditionalPredecessors) {
2429	if (!followedByDeoptOrUnreachable) {
2430	// It is always legal to sink common instructions from unconditional
2431	// predecessors. However, if not all predecessors are unconditional,
2432	// this transformation might be pessimizing. So as a rule of thumb,
2433	// don't do it unless we'd sink at least one non-speculatable instruction.
2434	// See https://bugs.llvm.org/show_bug.cgi?id=30244
2435	LRI.reset();
2436	int Idx = `0`;
2437	bool Profitable = false;
2438	while (Idx < ScanIdx) {
2439	if (!isSafeToSpeculativelyExecute(I: (*LRI)[`0`])) {
2440	Profitable = true;
2441	break;
2442	}
2443	--LRI;
2444	++Idx;
2445	}
2446	if (!Profitable)
2447	return false;
2448	}
2449
2450	LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2451	// We have a conditional edge and we're going to sink some instructions.
2452	// Insert a new block postdominating all blocks we're going to sink from.
2453	if (!SplitBlockPredecessors(BB, Preds: UnconditionalPreds, Suffix: ".sink.split", DTU))
2454	// Edges couldn't be split.
2455	return false;
2456	Changed = true;
2457	}
2458
2459	// Now that we've analyzed all potential sinking candidates, perform the
2460	// actual sink. We iteratively sink the last non-terminator of the source
2461	// blocks into their common successor unless doing so would require too
2462	// many PHI instructions to be generated (currently only one PHI is allowed
2463	// per sunk instruction).
2464	//
2465	// We can use InstructionsToSink to discount values needing PHI-merging that will
2466	// actually be sunk in a later iteration. This allows us to be more
2467	// aggressive in what we sink. This does allow a false positive where we
2468	// sink presuming a later value will also be sunk, but stop half way through
2469	// and never actually sink it which means we produce more PHIs than intended.
2470	// This is unlikely in practice though.
2471	int SinkIdx = `0`;
2472	for (; SinkIdx != ScanIdx; ++SinkIdx) {
2473	LLVM_DEBUG(dbgs() << "SINK: Sink: "
2474	<< *UnconditionalPreds[`0`]->getTerminator()->getPrevNode()
2475	<< "\n");
2476
2477	// Because we've sunk every instruction in turn, the current instruction to
2478	// sink is always at index 0.
2479	LRI.reset();
2480
2481	if (!sinkLastInstruction(Blocks: UnconditionalPreds)) {
2482	LLVM_DEBUG(
2483	dbgs()
2484	<< "SINK: stopping here, failed to actually sink instruction!\n");
2485	break;
2486	}
2487
2488	NumSinkCommonInstrs ++;
2489	Changed = true;
2490	}
2491	if (SinkIdx != `0`)
2492	++NumSinkCommonCode;
2493	return Changed;
2494	}
2495
2496	namespace {
2497
2498	struct CompatibleSets {
2499	using SetTy = SmallVector<InvokeInst *, `2`>;
2500
2501	SmallVector<SetTy, `1`> Sets;
2502
2503	static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2504
2505	SetTy &getCompatibleSet(InvokeInst *II);
2506
2507	void insert(InvokeInst *II);
2508	};
2509
2510	CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2511	// Perform a linear scan over all the existing sets, see if the new `invoke`
2512	// is compatible with any particular set. Since we know that all the `invokes`
2513	// within a set are compatible, only check the first `invoke` in each set.
2514	// WARNING: at worst, this has quadratic complexity.
2515	for (CompatibleSets::SetTy &Set : Sets) {
2516	if (CompatibleSets::shouldBelongToSameSet(Invokes: {Set.front(), II}))
2517	return Set;
2518	}
2519
2520	// Otherwise, we either had no sets yet, or this invoke forms a new set.
2521	return Sets.emplace_back();
2522	}
2523
2524	void CompatibleSets::insert(InvokeInst *II) {
2525	getCompatibleSet(II).emplace_back(Args&: II);
2526	}
2527
2528	bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2529	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2530
2531	// Can we theoretically merge these `invoke`s?
2532	auto IsIllegalToMerge = [](InvokeInst *II) {
2533	return II->cannotMerge() \|\| II->isInlineAsm();
2534	};
2535	if (any_of(Range&: Invokes, P: IsIllegalToMerge))
2536	return false;
2537
2538	// Either both `invoke`s must be direct,
2539	// or both `invoke`s must be indirect.
2540	auto IsIndirectCall = [](InvokeInst II) { return* II->isIndirectCall(); };
2541	bool HaveIndirectCalls = any_of(Range&: Invokes, P: IsIndirectCall);
2542	bool AllCallsAreIndirect = all_of(Range&: Invokes, P: IsIndirectCall);
2543	if (HaveIndirectCalls) {
2544	if (!AllCallsAreIndirect)
2545	return false;
2546	} else {
2547	// All callees must be identical.
2548	Value Callee = nullptr*;
2549	for (InvokeInst *II : Invokes) {
2550	Value *CurrCallee = II->getCalledOperand();
2551	assert(CurrCallee && "There is always a called operand.");
2552	if (!Callee)
2553	Callee = CurrCallee;
2554	else if (Callee != CurrCallee)
2555	return false;
2556	}
2557	}
2558
2559	// Either both `invoke`s must not have a normal destination,
2560	// or both `invoke`s must have a normal destination,
2561	auto HasNormalDest = [](InvokeInst *II) {
2562	return !isa<UnreachableInst>(Val: II->getNormalDest()->getFirstNonPHIOrDbg());
2563	};
2564	if (any_of(Range&: Invokes, P: HasNormalDest)) {
2565	// Do not merge `invoke` that does not have a normal destination with one
2566	// that does have a normal destination, even though doing so would be legal.
2567	if (!all_of(Range&: Invokes, P: HasNormalDest))
2568	return false;
2569
2570	// All normal destinations must be identical.
2571	BasicBlock NormalBB = nullptr*;
2572	for (InvokeInst *II : Invokes) {
2573	BasicBlock *CurrNormalBB = II->getNormalDest();
2574	assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2575	if (!NormalBB)
2576	NormalBB = CurrNormalBB;
2577	else if (NormalBB != CurrNormalBB)
2578	return false;
2579	}
2580
2581	// In the normal destination, the incoming values for these two `invoke`s
2582	// must be compatible.
2583	SmallPtrSet<Value *, `16`> EquivalenceSet(Invokes.begin(), Invokes.end());
2584	if (!IncomingValuesAreCompatible(
2585	BB: NormalBB, IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()},
2586	EquivalenceSet: &EquivalenceSet))
2587	return false;
2588	}
2589
2590	#ifndef NDEBUG
2591	// All unwind destinations must be identical.
2592	// We know that because we have started from said unwind destination.
2593	BasicBlock UnwindBB = nullptr*;
2594	for (InvokeInst *II : Invokes) {
2595	BasicBlock *CurrUnwindBB = II->getUnwindDest();
2596	assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2597	if (!UnwindBB)
2598	UnwindBB = CurrUnwindBB;
2599	else
2600	assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2601	}
2602	#endif
2603
2604	// In the unwind destination, the incoming values for these two `invoke`s
2605	// must be compatible.
2606	if (!IncomingValuesAreCompatible(
2607	BB: Invokes.front()->getUnwindDest(),
2608	IncomingBlocks: {Invokes [`0`]->getParent(), Invokes [`1`]->getParent()}))
2609	return false;
2610
2611	// Ignoring arguments, these `invoke`s must be identical,
2612	// including operand bundles.
2613	const InvokeInst *II0 = Invokes.front();
2614	for (auto *II : Invokes.drop_front())
2615	if (!II->isSameOperationAs(I: II0))
2616	return false;
2617
2618	// Can we theoretically form the data operands for the merged `invoke`?
2619	auto IsIllegalToMergeArguments = [](auto Ops) {
2620	Use &U0 = std::get<`0`>(Ops);
2621	Use &U1 = std::get<`1`>(Ops);
2622	if (U0 == U1)
2623	return false;
2624	return U0 ->getType()->isTokenTy() \|\|
2625	!canReplaceOperandWithVariable(I: cast<Instruction>(Val: U0.getUser()),
2626	OpIdx: U0.getOperandNo());
2627	};
2628	assert(Invokes.size() == `2` && "Always called with exactly two candidates.");
2629	if (any_of(Range: zip(t: Invokes [`0`]->data_ops(), u: Invokes [`1`]->data_ops()),
2630	P: IsIllegalToMergeArguments))
2631	return false;
2632
2633	return true;
2634	}
2635
2636	} // namespace
2637
2638	// Merge all invokes in the provided set, all of which are compatible
2639	// as per the `CompatibleSets::shouldBelongToSameSet()`.
2640	static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2641	DomTreeUpdater *DTU) {
2642	assert(Invokes.size() >= `2` && "Must have at least two invokes to merge.");
2643
2644	SmallVector<DominatorTree::UpdateType, `8`> Updates;
2645	if (DTU)
2646	Updates.reserve(N: `2` + `3` * Invokes.size());
2647
2648	bool HasNormalDest =
2649	!isa<UnreachableInst>(Val: Invokes [`0`]->getNormalDest()->getFirstNonPHIOrDbg());
2650
2651	// Clone one of the invokes into a new basic block.
2652	// Since they are all compatible, it doesn't matter which invoke is cloned.
2653	InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2654	InvokeInst *II0 = Invokes.front();
2655	BasicBlock *II0BB = II0->getParent();
2656	BasicBlock *InsertBeforeBlock =
2657	II0->getParent()->getIterator()->getNextNode();
2658	Function *Func = II0BB->getParent();
2659	LLVMContext &Ctx = II0->getContext();
2660
2661	BasicBlock *MergedInvokeBB = BasicBlock::Create(
2662	Context&: Ctx, Name: II0BB->getName() + ".invoke", Parent: Func, InsertBefore: InsertBeforeBlock);
2663
2664	auto *MergedInvoke = cast<InvokeInst>(Val: II0->clone());
2665	// NOTE: all invokes have the same attributes, so no handling needed.
2666	MergedInvoke->insertInto(ParentBB: MergedInvokeBB, It: MergedInvokeBB->end());
2667
2668	if (!HasNormalDest) {
2669	// This set does not have a normal destination,
2670	// so just form a new block with unreachable terminator.
2671	BasicBlock *MergedNormalDest = BasicBlock::Create(
2672	Context&: Ctx, Name: II0BB->getName() + ".cont", Parent: Func, InsertBefore: InsertBeforeBlock);
2673	new UnreachableInst (Ctx, MergedNormalDest);
2674	MergedInvoke->setNormalDest(MergedNormalDest);
2675	}
2676
2677	// The unwind destination, however, remainds identical for all invokes here.
2678
2679	return MergedInvoke;
2680	}();
2681
2682	if (DTU) {
2683	// Predecessor blocks that contained these invokes will now branch to
2684	// the new block that contains the merged invoke, ...
2685	for (InvokeInst *II : Invokes)
2686	Updates.push_back(
2687	Elt: {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2688
2689	// ... which has the new `unreachable` block as normal destination,
2690	// or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2691	for (BasicBlock *SuccBBOfMergedInvoke : successors(I: MergedInvoke))
2692	Updates.push_back(Elt: {DominatorTree::Insert, MergedInvoke->getParent(),
2693	SuccBBOfMergedInvoke});
2694
2695	// Since predecessor blocks now unconditionally branch to a new block,
2696	// they no longer branch to their original successors.
2697	for (InvokeInst *II : Invokes)
2698	for (BasicBlock *SuccOfPredBB : successors(BB: II->getParent()))
2699	Updates.push_back(
2700	Elt: {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2701	}
2702
2703	bool IsIndirectCall = Invokes [`0`]->isIndirectCall();
2704
2705	// Form the merged operands for the merged invoke.
2706	for (Use &U : MergedInvoke->operands()) {
2707	// Only PHI together the indirect callees and data operands.
2708	if (MergedInvoke->isCallee(U: &U)) {
2709	if (!IsIndirectCall)
2710	continue;
2711	} else if (!MergedInvoke->isDataOperand(U: &U))
2712	continue;
2713
2714	// Don't create trivial PHI's with all-identical incoming values.
2715	bool NeedPHI = any_of(Range&: Invokes, P: [&U](InvokeInst *II) {
2716	return II->getOperand(i_nocapture: U.getOperandNo()) != U.get();
2717	});
2718	if (!NeedPHI)
2719	continue;
2720
2721	// Form a PHI out of all the data ops under this index.
2722	PHINode *PN = PHINode::Create(
2723	Ty: U ->getType(), /NumReservedValues=/Invokes.size(), NameStr: "", InsertBefore: MergedInvoke->getIterator());
2724	for (InvokeInst *II : Invokes)
2725	PN->addIncoming(V: II->getOperand(i_nocapture: U.getOperandNo()), BB: II->getParent());
2726
2727	U.set(PN);
2728	}
2729
2730	// We've ensured that each PHI node has compatible (identical) incoming values
2731	// when coming from each of the `invoke`s in the current merge set,
2732	// so update the PHI nodes accordingly.
2733	for (BasicBlock *Succ : successors(I: MergedInvoke))
2734	AddPredecessorToBlock(Succ, /NewPred=/MergedInvoke->getParent(),
2735	/ExistPred=/Invokes.front()->getParent());
2736
2737	// And finally, replace the original `invoke`s with an unconditional branch
2738	// to the block with the merged `invoke`. Also, give that merged `invoke`
2739	// the merged debugloc of all the original `invoke`s.
2740	DILocation MergedDebugLoc = nullptr*;
2741	for (InvokeInst *II : Invokes) {
2742	// Compute the debug location common to all the original `invoke`s.
2743	if (!MergedDebugLoc)
2744	MergedDebugLoc = II->getDebugLoc();
2745	else
2746	MergedDebugLoc =
2747	DILocation::getMergedLocation(LocA: MergedDebugLoc, LocB: II->getDebugLoc());
2748
2749	// And replace the old `invoke` with an unconditionally branch
2750	// to the block with the merged `invoke`.
2751	for (BasicBlock *OrigSuccBB : successors(BB: II->getParent()))
2752	OrigSuccBB->removePredecessor(Pred: II->getParent());
2753	BranchInst::Create(IfTrue: MergedInvoke->getParent(), InsertAtEnd: II->getParent());
2754	II->replaceAllUsesWith(V: MergedInvoke);
2755	II->eraseFromParent();
2756	++NumInvokesMerged;
2757	}
2758	MergedInvoke->setDebugLoc(MergedDebugLoc);
2759	++NumInvokeSetsFormed;
2760
2761	if (DTU)
2762	DTU->applyUpdates(Updates);
2763	}
2764
2765	/// If this block is a `landingpad` exception handling block, categorize all
2766	/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2767	/// being "mergeable" together, and then merge invokes in each set together.
2768	///
2769	/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2770	/// [...] [...]
2771	/// \| \|
2772	/// [invoke0] [invoke1]
2773	/// / \ / \
2774	/// [cont0] [landingpad] [cont1]
2775	/// to:
2776	/// [...] [...]
2777	/// \ /
2778	/// [invoke]
2779	/// / \
2780	/// [cont] [landingpad]
2781	///
2782	/// But of course we can only do that if the invokes share the `landingpad`,
2783	/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2784	/// and the invoked functions are "compatible".
2785	static bool MergeCompatibleInvokes(BasicBlock BB, DomTreeUpdater DTU) {
2786	if (!EnableMergeCompatibleInvokes)
2787	return false;
2788
2789	bool Changed = false;
2790
2791	// FIXME: generalize to all exception handling blocks?
2792	if (!BB->isLandingPad())
2793	return Changed;
2794
2795	CompatibleSets Grouper;
2796
2797	// Record all the predecessors of this `landingpad`. As per verifier,
2798	// the only allowed predecessor is the unwind edge of an `invoke`.
2799	// We want to group "compatible" `invokes` into the same set to be merged.
2800	for (BasicBlock *PredBB : predecessors(BB))
2801	Grouper.insert(II: cast<InvokeInst>(Val: PredBB->getTerminator()));
2802
2803	// And now, merge `invoke`s that were grouped togeter.
2804	for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2805	if (Invokes.size() < `2`)
2806	continue;
2807	Changed = true;
2808	MergeCompatibleInvokesImpl(Invokes, DTU);
2809	}
2810
2811	return Changed;
2812	}
2813
2814	namespace {
2815	/// Track ephemeral values, which should be ignored for cost-modelling
2816	/// purposes. Requires walking instructions in reverse order.
2817	class EphemeralValueTracker {
2818	SmallPtrSet<const Instruction *, `32`> EphValues;
2819
2820	bool isEphemeral(const Instruction *I) {
2821	if (isa<AssumeInst>(Val: I))
2822	return true;
2823	return !I->mayHaveSideEffects() && !I->isTerminator() &&
2824	all_of(Range: I->users(), P: [&](const User *U) {
2825	return EphValues.count(Ptr: cast<Instruction>(Val: U));
2826	});
2827	}
2828
2829	public:
2830	bool track(const Instruction *I) {
2831	if (isEphemeral(I)) {
2832	EphValues.insert(Ptr: I);
2833	return true;
2834	}
2835	return false;
2836	}
2837
2838	bool contains(const Instruction I) const* { return EphValues.contains(Ptr: I); }
2839	};
2840	} // namespace
2841
2842	/// Determine if we can hoist sink a sole store instruction out of a
2843	/// conditional block.
2844	///
2845	/// We are looking for code like the following:
2846	/// BrBB:
2847	/// store i32 %add, i32 %arrayidx2*
2848	/// ... // No other stores or function calls (we could be calling a memory
2849	/// ... // function).
2850	/// %cmp = icmp ult %x, %y
2851	/// br i1 %cmp, label %EndBB, label %ThenBB
2852	/// ThenBB:
2853	/// store i32 %add5, i32 %arrayidx2*
2854	/// br label EndBB
2855	/// EndBB:
2856	/// ...
2857	/// We are going to transform this into:
2858	/// BrBB:
2859	/// store i32 %add, i32 %arrayidx2*
2860	/// ... //
2861	/// %cmp = icmp ult %x, %y
2862	/// %add.add5 = select i1 %cmp, i32 %add, %add5
2863	/// store i32 %add.add5, i32 %arrayidx2*
2864	/// ...
2865	///
2866	/// \return The pointer to the value of the previous store if the store can be
2867	/// hoisted into the predecessor block. 0 otherwise.
2868	static Value isSafeToSpeculateStore(Instruction I, BasicBlock *BrBB,
2869	BasicBlock StoreBB, BasicBlock EndBB) {
2870	StoreInst *StoreToHoist = dyn_cast<StoreInst>(Val: I);
2871	if (!StoreToHoist)
2872	return nullptr;
2873
2874	// Volatile or atomic.
2875	if (!StoreToHoist->isSimple())
2876	return nullptr;
2877
2878	Value *StorePtr = StoreToHoist->getPointerOperand();
2879	Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2880
2881	// Look for a store to the same pointer in BrBB.
2882	unsigned MaxNumInstToLookAt = `9`;
2883	// Skip pseudo probe intrinsic calls which are not really killing any memory
2884	// accesses.
2885	for (Instruction &CurI : reverse(C: BrBB->instructionsWithoutDebug(SkipPseudoOp: true))) {
2886	if (!MaxNumInstToLookAt)
2887	break;
2888	--MaxNumInstToLookAt;
2889
2890	// Could be calling an instruction that affects memory like free().
2891	if (CurI.mayWriteToMemory() && !isa<StoreInst>(Val: CurI))
2892	return nullptr;
2893
2894	if (auto *SI = dyn_cast<StoreInst>(Val: &CurI)) {
2895	// Found the previous store to same location and type. Make sure it is
2896	// simple, to avoid introducing a spurious non-atomic write after an
2897	// atomic write.
2898	if (SI->getPointerOperand() == StorePtr &&
2899	SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
2900	SI->getAlign() >= StoreToHoist->getAlign())
2901	// Found the previous store, return its value operand.
2902	return SI->getValueOperand();
2903	return nullptr; // Unknown store.
2904	}
2905
2906	if (auto *LI = dyn_cast<LoadInst>(Val: &CurI)) {
2907	if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2908	LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
2909	// Local objects (created by an `alloca` instruction) are always
2910	// writable, so once we are past a read from a location it is valid to
2911	// also write to that same location.
2912	// If the address of the local object never escapes the function, that
2913	// means it's never concurrently read or written, hence moving the store
2914	// from under the condition will not introduce a data race.
2915	auto *AI = dyn_cast<AllocaInst>(Val: getUnderlyingObject(V: StorePtr));
2916	if (AI && !PointerMayBeCaptured(V: AI, ReturnCaptures: false, StoreCaptures: true))
2917	// Found a previous load, return it.
2918	return LI;
2919	}
2920	// The load didn't work out, but we may still find a store.
2921	}
2922	}
2923
2924	return nullptr;
2925	}
2926
2927	/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2928	/// converted to selects.
2929	static bool validateAndCostRequiredSelects(BasicBlock BB, BasicBlock ThenBB,
2930	BasicBlock *EndBB,
2931	unsigned &SpeculatedInstructions,
2932	InstructionCost &Cost,
2933	const TargetTransformInfo &TTI) {
2934	TargetTransformInfo::TargetCostKind CostKind =
2935	BB->getParent()->hasMinSize()
2936	? TargetTransformInfo::TCK_CodeSize
2937	: TargetTransformInfo::TCK_SizeAndLatency;
2938
2939	bool HaveRewritablePHIs = false;
2940	for (PHINode &PN : EndBB->phis()) {
2941	Value *OrigV = PN.getIncomingValueForBlock(BB);
2942	Value *ThenV = PN.getIncomingValueForBlock(BB: ThenBB);
2943
2944	// FIXME: Try to remove some of the duplication with
2945	// hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2946	if (ThenV == OrigV)
2947	continue;
2948
2949	Cost += TTI.getCmpSelInstrCost(Opcode: Instruction::Select, ValTy: PN.getType(), CondTy: nullptr,
2950	VecPred: CmpInst::BAD_ICMP_PREDICATE, CostKind);
2951
2952	// Don't convert to selects if we could remove undefined behavior instead.
2953	if (passingValueIsAlwaysUndefined(V: OrigV, I: &PN) \|\|
2954	passingValueIsAlwaysUndefined(V: ThenV, I: &PN))
2955	return false;
2956
2957	HaveRewritablePHIs = true;
2958	ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(Val: OrigV);
2959	ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(Val: ThenV);
2960	if (!OrigCE && !ThenCE)
2961	continue; // Known cheap (FIXME: Maybe not true for aggregates).
2962
2963	InstructionCost OrigCost = OrigCE ? computeSpeculationCost(I: OrigCE, TTI) : `0`;
2964	InstructionCost ThenCost = ThenCE ? computeSpeculationCost(I: ThenCE, TTI) : `0`;
2965	InstructionCost MaxCost =
2966	`2` * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2967	if (OrigCost + ThenCost > MaxCost)
2968	return false;
2969
2970	// Account for the cost of an unfolded ConstantExpr which could end up
2971	// getting expanded into Instructions.
2972	// FIXME: This doesn't account for how many operations are combined in the
2973	// constant expression.
2974	++SpeculatedInstructions;
2975	if (SpeculatedInstructions > `1`)
2976	return false;
2977	}
2978
2979	return HaveRewritablePHIs;
2980	}
2981
2982	/// Speculate a conditional basic block flattening the CFG.
2983	///
2984	/// Note that this is a very risky transform currently. Speculating
2985	/// instructions like this is most often not desirable. Instead, there is an MI
2986	/// pass which can do it with full awareness of the resource constraints.
2987	/// However, some cases are "obvious" and we should do directly. An example of
2988	/// this is speculating a single, reasonably cheap instruction.
2989	///
2990	/// There is only one distinct advantage to flattening the CFG at the IR level:
2991	/// it makes very common but simplistic optimizations such as are common in
2992	/// instcombine and the DAG combiner more powerful by removing CFG edges and
2993	/// modeling their effects with easier to reason about SSA value graphs.
2994	///
2995	///
2996	/// An illustration of this transform is turning this IR:
2997	/// \code
2998	/// BB:
2999	/// %cmp = icmp ult %x, %y
3000	/// br i1 %cmp, label %EndBB, label %ThenBB
3001	/// ThenBB:
3002	/// %sub = sub %x, %y
3003	/// br label BB2
3004	/// EndBB:
3005	/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
3006	/// ...
3007	/// \endcode
3008	///
3009	/// Into this IR:
3010	/// \code
3011	/// BB:
3012	/// %cmp = icmp ult %x, %y
3013	/// %sub = sub %x, %y
3014	/// %cond = select i1 %cmp, 0, %sub
3015	/// ...
3016	/// \endcode
3017	///
3018	/// \returns true if the conditional block is removed.
3019	bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
3020	BasicBlock *ThenBB) {
3021	if (!Options.SpeculateBlocks)
3022	return false;
3023
3024	// Be conservative for now. FP select instruction can often be expensive.
3025	Value *BrCond = BI->getCondition();
3026	if (isa<FCmpInst>(Val: BrCond))
3027	return false;
3028
3029	BasicBlock *BB = BI->getParent();
3030	BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(Idx: `0`);
3031	InstructionCost Budget =
3032	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3033
3034	// If ThenBB is actually on the false edge of the conditional branch, remember
3035	// to swap the select operands later.
3036	bool Invert = false;
3037	if (ThenBB != BI->getSuccessor(i: `0`)) {
3038	assert(ThenBB == BI->getSuccessor(`1`) && "No edge from 'if' block?");
3039	Invert = true;
3040	}
3041	assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3042
3043	// If the branch is non-unpredictable, and is predicted to not* branch to*
3044	// the `then` block, then avoid speculating it.
3045	if (!BI->getMetadata(KindID: LLVMContext::MD_unpredictable)) {
3046	uint64_t TWeight, FWeight;
3047	if (extractBranchWeights(I: *BI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3048	(TWeight + FWeight) != `0`) {
3049	uint64_t EndWeight = Invert ? TWeight : FWeight;
3050	BranchProbability BIEndProb =
3051	BranchProbability::getBranchProbability(Numerator: EndWeight, Denominator: TWeight + FWeight);
3052	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3053	if (BIEndProb >= Likely)
3054	return false;
3055	}
3056	}
3057
3058	// Keep a count of how many times instructions are used within ThenBB when
3059	// they are candidates for sinking into ThenBB. Specifically:
3060	// - They are defined in BB, and
3061	// - They have no side effects, and
3062	// - All of their uses are in ThenBB.
3063	SmallDenseMap<Instruction , unsigned*, `4`> SinkCandidateUseCounts;
3064
3065	SmallVector<Instruction *, `4`> SpeculatedDbgIntrinsics;
3066
3067	unsigned SpeculatedInstructions = `0`;
3068	Value SpeculatedStoreValue = nullptr*;
3069	StoreInst SpeculatedStore = nullptr*;
3070	EphemeralValueTracker EphTracker;
3071	for (Instruction &I : reverse(C: drop_end(RangeOrContainer&: *ThenBB))) {
3072	// Skip debug info.
3073	if (isa<DbgInfoIntrinsic>(Val: I)) {
3074	SpeculatedDbgIntrinsics.push_back(Elt: &I);
3075	continue;
3076	}
3077
3078	// Skip pseudo probes. The consequence is we lose track of the branch
3079	// probability for ThenBB, which is fine since the optimization here takes
3080	// place regardless of the branch probability.
3081	if (isa<PseudoProbeInst>(Val: I)) {
3082	// The probe should be deleted so that it will not be over-counted when
3083	// the samples collected on the non-conditional path are counted towards
3084	// the conditional path. We leave it for the counts inference algorithm to
3085	// figure out a proper count for an unknown probe.
3086	SpeculatedDbgIntrinsics.push_back(Elt: &I);
3087	continue;
3088	}
3089
3090	// Ignore ephemeral values, they will be dropped by the transform.
3091	if (EphTracker.track(I: &I))
3092	continue;
3093
3094	// Only speculatively execute a single instruction (not counting the
3095	// terminator) for now.
3096	++SpeculatedInstructions;
3097	if (SpeculatedInstructions > `1`)
3098	return false;
3099
3100	// Don't hoist the instruction if it's unsafe or expensive.
3101	if (!isSafeToSpeculativelyExecute(I: &I) &&
3102	!(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3103	I: &I, BrBB: BB, StoreBB: ThenBB, EndBB))))
3104	return false;
3105	if (!SpeculatedStoreValue &&
3106	computeSpeculationCost(I: &I, TTI) >
3107	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3108	return false;
3109
3110	// Store the store speculation candidate.
3111	if (SpeculatedStoreValue)
3112	SpeculatedStore = cast<StoreInst>(Val: &I);
3113
3114	// Do not hoist the instruction if any of its operands are defined but not
3115	// used in BB. The transformation will prevent the operand from
3116	// being sunk into the use block.
3117	for (Use &Op : I.operands()) {
3118	Instruction *OpI = dyn_cast<Instruction>(Val&: Op);
3119	if (!OpI \|\| OpI->getParent() != BB \|\| OpI->mayHaveSideEffects())
3120	continue; // Not a candidate for sinking.
3121
3122	++SinkCandidateUseCounts [OpI];
3123	}
3124	}
3125
3126	// Consider any sink candidates which are only used in ThenBB as costs for
3127	// speculation. Note, while we iterate over a DenseMap here, we are summing
3128	// and so iteration order isn't significant.
3129	for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3130	if (Inst->hasNUses(N: Count)) {
3131	++SpeculatedInstructions;
3132	if (SpeculatedInstructions > `1`)
3133	return false;
3134	}
3135
3136	// Check that we can insert the selects and that it's not too expensive to do
3137	// so.
3138	bool Convert = SpeculatedStore != nullptr;
3139	InstructionCost Cost = `0`;
3140	Convert \|= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3141	SpeculatedInstructions,
3142	Cost, TTI);
3143	if (!Convert \|\| Cost > Budget)
3144	return false;
3145
3146	// If we get here, we can hoist the instruction and if-convert.
3147	LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3148
3149	// Insert a select of the value of the speculated store.
3150	if (SpeculatedStoreValue) {
3151	IRBuilder<NoFolder> Builder(BI);
3152	Value *OrigV = SpeculatedStore->getValueOperand();
3153	Value *TrueV = SpeculatedStore->getValueOperand();
3154	Value *FalseV = SpeculatedStoreValue;
3155	if (Invert)
3156	std::swap(a&: TrueV, b&: FalseV);
3157	Value *S = Builder.CreateSelect(
3158	C: BrCond, True: TrueV, False: FalseV, Name: "spec.store.select", MDFrom: BI);
3159	SpeculatedStore->setOperand(i_nocapture: `0`, Val_nocapture: S);
3160	SpeculatedStore->applyMergedLocation(LocA: BI->getDebugLoc(),
3161	LocB: SpeculatedStore->getDebugLoc());
3162	// The value stored is still conditional, but the store itself is now
3163	// unconditonally executed, so we must be sure that any linked dbg.assign
3164	// intrinsics are tracking the new stored value (the result of the
3165	// select). If we don't, and the store were to be removed by another pass
3166	// (e.g. DSE), then we'd eventually end up emitting a location describing
3167	// the conditional value, unconditionally.
3168	//
3169	// === Before this transformation ===
3170	// pred:
3171	// store %one, %x.dest, !DIAssignID !1
3172	// dbg.assign %one, "x", ..., !1, ...
3173	// br %cond if.then
3174	//
3175	// if.then:
3176	// store %two, %x.dest, !DIAssignID !2
3177	// dbg.assign %two, "x", ..., !2, ...
3178	//
3179	// === After this transformation ===
3180	// pred:
3181	// store %one, %x.dest, !DIAssignID !1
3182	// dbg.assign %one, "x", ..., !1
3183	/// ...
3184	// %merge = select %cond, %two, %one
3185	// store %merge, %x.dest, !DIAssignID !2
3186	// dbg.assign %merge, "x", ..., !2
3187	auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3188	if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3189	DbgAssign->replaceVariableLocationOp(OrigV, S);
3190	};
3191	for_each(Range: at::getAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3192	for_each(Range: at::getDVRAssignmentMarkers(Inst: SpeculatedStore), F: replaceVariable);
3193	}
3194
3195	// Metadata can be dependent on the condition we are hoisting above.
3196	// Strip all UB-implying metadata on the instruction. Drop the debug loc
3197	// to avoid making it appear as if the condition is a constant, which would
3198	// be misleading while debugging.
3199	// Similarly strip attributes that maybe dependent on condition we are
3200	// hoisting above.
3201	for (auto &I : make_early_inc_range(Range&: *ThenBB)) {
3202	if (!SpeculatedStoreValue \|\| &I != SpeculatedStore) {
3203	// Don't update the DILocation of dbg.assign intrinsics.
3204	if (!isa<DbgAssignIntrinsic>(Val: &I))
3205	I.setDebugLoc(DebugLoc ());
3206	}
3207	I.dropUBImplyingAttrsAndMetadata();
3208
3209	// Drop ephemeral values.
3210	if (EphTracker.contains(I: &I)) {
3211	I.replaceAllUsesWith(V: PoisonValue::get(T: I.getType()));
3212	I.eraseFromParent();
3213	}
3214	}
3215
3216	// Hoist the instructions.
3217	// In "RemoveDIs" non-instr debug-info mode, drop DbgVariableRecords attached
3218	// to these instructions, in the same way that dbg.value intrinsics are
3219	// dropped at the end of this block.
3220	for (auto &It : make_range(x: ThenBB->begin(), y: ThenBB->end()))
3221	for (DbgRecord &DR : make_early_inc_range(Range: It.getDbgRecordRange()))
3222	// Drop all records except assign-kind DbgVariableRecords (dbg.assign
3223	// equivalent).
3224	if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(Val: &DR);
3225	!DVR \|\| !DVR->isDbgAssign())
3226	It.dropOneDbgRecord(I: &DR);
3227	BB->splice(ToIt: BI->getIterator(), FromBB: ThenBB, FromBeginIt: ThenBB->begin(),
3228	FromEndIt: std::prev(x: ThenBB->end()));
3229
3230	// Insert selects and rewrite the PHI operands.
3231	IRBuilder<NoFolder> Builder(BI);
3232	for (PHINode &PN : EndBB->phis()) {
3233	unsigned OrigI = PN.getBasicBlockIndex(BB);
3234	unsigned ThenI = PN.getBasicBlockIndex(BB: ThenBB);
3235	Value *OrigV = PN.getIncomingValue(i: OrigI);
3236	Value *ThenV = PN.getIncomingValue(i: ThenI);
3237
3238	// Skip PHIs which are trivial.
3239	if (OrigV == ThenV)
3240	continue;
3241
3242	// Create a select whose true value is the speculatively executed value and
3243	// false value is the pre-existing value. Swap them if the branch
3244	// destinations were inverted.
3245	Value TrueV = ThenV, FalseV = OrigV;
3246	if (Invert)
3247	std::swap(a&: TrueV, b&: FalseV);
3248	Value *V = Builder.CreateSelect(C: BrCond, True: TrueV, False: FalseV, Name: "spec.select", MDFrom: BI);
3249	PN.setIncomingValue(i: OrigI, V);
3250	PN.setIncomingValue(i: ThenI, V);
3251	}
3252
3253	// Remove speculated dbg intrinsics.
3254	// FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3255	// dbg value for the different flows and inserting it after the select.
3256	for (Instruction *I : SpeculatedDbgIntrinsics) {
3257	// We still want to know that an assignment took place so don't remove
3258	// dbg.assign intrinsics.
3259	if (!isa<DbgAssignIntrinsic>(Val: I))
3260	I->eraseFromParent();
3261	}
3262
3263	++NumSpeculations;
3264	return true;
3265	}
3266
3267	/// Return true if we can thread a branch across this block.
3268	static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3269	int Size = `0`;
3270	EphemeralValueTracker EphTracker;
3271
3272	// Walk the loop in reverse so that we can identify ephemeral values properly
3273	// (values only feeding assumes).
3274	for (Instruction &I : reverse(C: BB->instructionsWithoutDebug(SkipPseudoOp: false))) {
3275	// Can't fold blocks that contain noduplicate or convergent calls.
3276	if (CallInst *CI = dyn_cast<CallInst>(Val: &I))
3277	if (CI->cannotDuplicate() \|\| CI->isConvergent())
3278	return false;
3279
3280	// Ignore ephemeral values which are deleted during codegen.
3281	// We will delete Phis while threading, so Phis should not be accounted in
3282	// block's size.
3283	if (!EphTracker.track(I: &I) && !isa<PHINode>(Val: I)) {
3284	if (Size++ > MaxSmallBlockSize)
3285	return false; // Don't clone large BB's.
3286	}
3287
3288	// We can only support instructions that do not define values that are
3289	// live outside of the current basic block.
3290	for (User *U : I.users()) {
3291	Instruction *UI = cast<Instruction>(Val: U);
3292	if (UI->getParent() != BB \|\| isa<PHINode>(Val: UI))
3293	return false;
3294	}
3295
3296	// Looks ok, continue checking.
3297	}
3298
3299	return true;
3300	}
3301
3302	static ConstantInt getKnownValueOnEdge(Value V, BasicBlock *From,
3303	BasicBlock *To) {
3304	// Don't look past the block defining the value, we might get the value from
3305	// a previous loop iteration.
3306	auto *I = dyn_cast<Instruction>(Val: V);
3307	if (I && I->getParent() == To)
3308	return nullptr;
3309
3310	// We know the value if the From block branches on it.
3311	auto *BI = dyn_cast<BranchInst>(Val: From->getTerminator());
3312	if (BI && BI->isConditional() && BI->getCondition() == V &&
3313	BI->getSuccessor(i: `0`) != BI->getSuccessor(i: `1`))
3314	return BI->getSuccessor(i: `0`) == To ? ConstantInt::getTrue(Context&: BI->getContext())
3315	: ConstantInt::getFalse(Context&: BI->getContext());
3316
3317	return nullptr;
3318	}
3319
3320	/// If we have a conditional branch on something for which we know the constant
3321	/// value in predecessors (e.g. a phi node in the current block), thread edges
3322	/// from the predecessor to their ultimate destination.
3323	static std::optional<bool>
3324	FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst BI, DomTreeUpdater DTU,
3325	const DataLayout &DL,
3326	AssumptionCache *AC) {
3327	SmallMapVector<ConstantInt , SmallSetVector<BasicBlock , `2`>, `2`> KnownValues;
3328	BasicBlock *BB = BI->getParent();
3329	Value *Cond = BI->getCondition();
3330	PHINode *PN = dyn_cast<PHINode>(Val: Cond);
3331	if (PN && PN->getParent() == BB) {
3332	// Degenerate case of a single entry PHI.
3333	if (PN->getNumIncomingValues() == `1`) {
3334	FoldSingleEntryPHINodes(BB: PN->getParent());
3335	return true;
3336	}
3337
3338	for (Use &U : PN->incoming_values())
3339	if (auto *CB = dyn_cast<ConstantInt>(Val&: U))
3340	KnownValues [CB].insert(X: PN->getIncomingBlock(U));
3341	} else {
3342	for (BasicBlock *Pred : predecessors(BB)) {
3343	if (ConstantInt *CB = getKnownValueOnEdge(V: Cond, From: Pred, To: BB))
3344	KnownValues [CB].insert(X: Pred);
3345	}
3346	}
3347
3348	if (KnownValues.empty())
3349	return false;
3350
3351	// Now we know that this block has multiple preds and two succs.
3352	// Check that the block is small enough and values defined in the block are
3353	// not used outside of it.
3354	if (!BlockIsSimpleEnoughToThreadThrough(BB))
3355	return false;
3356
3357	for (const auto &Pair : KnownValues) {
3358	// Okay, we now know that all edges from PredBB should be revectored to
3359	// branch to RealDest.
3360	ConstantInt *CB = Pair.first;
3361	ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3362	BasicBlock *RealDest = BI->getSuccessor(i: !CB->getZExtValue());
3363
3364	if (RealDest == BB)
3365	continue; // Skip self loops.
3366
3367	// Skip if the predecessor's terminator is an indirect branch.
3368	if (any_of(Range&: PredBBs, P: [](BasicBlock *PredBB) {
3369	return isa<IndirectBrInst>(Val: PredBB->getTerminator());
3370	}))
3371	continue;
3372
3373	LLVM_DEBUG({
3374	dbgs() << "Condition " << *Cond << " in " << BB->getName()
3375	<< " has value " << *Pair.first << " in predecessors:\n";
3376	for (const BasicBlock *PredBB : Pair.second)
3377	dbgs() << " " << PredBB->getName() << "\n";
3378	dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3379	});
3380
3381	// Split the predecessors we are threading into a new edge block. We'll
3382	// clone the instructions into this block, and then redirect it to RealDest.
3383	BasicBlock *EdgeBB = SplitBlockPredecessors(BB, Preds: PredBBs, Suffix: ".critedge", DTU);
3384
3385	// TODO: These just exist to reduce test diff, we can drop them if we like.
3386	EdgeBB->setName(RealDest->getName() + ".critedge");
3387	EdgeBB->moveBefore(MovePos: RealDest);
3388
3389	// Update PHI nodes.
3390	AddPredecessorToBlock(Succ: RealDest, NewPred: EdgeBB, ExistPred: BB);
3391
3392	// BB may have instructions that are being threaded over. Clone these
3393	// instructions into EdgeBB. We know that there will be no uses of the
3394	// cloned instructions outside of EdgeBB.
3395	BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3396	DenseMap<Value , Value > TranslateMap; // Track translated values.
3397	TranslateMap [Cond] = CB;
3398
3399	// RemoveDIs: track instructions that we optimise away while folding, so
3400	// that we can copy DbgVariableRecords from them later.
3401	BasicBlock::iterator SrcDbgCursor = BB->begin();
3402	for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3403	if (PHINode *PN = dyn_cast<PHINode>(Val&: BBI)) {
3404	TranslateMap [PN] = PN->getIncomingValueForBlock(BB: EdgeBB);
3405	continue;
3406	}
3407	// Clone the instruction.
3408	Instruction *N = BBI ->clone();
3409	// Insert the new instruction into its new home.
3410	N->insertInto(ParentBB: EdgeBB, It: InsertPt);
3411
3412	if (BBI ->hasName())
3413	N->setName(BBI ->getName() + ".c");
3414
3415	// Update operands due to translation.
3416	for (Use &Op : N->operands()) {
3417	DenseMap<Value , Value >::iterator PI = TranslateMap.find(Val: Op);
3418	if (PI != TranslateMap.end())
3419	Op = PI ->second;
3420	}
3421
3422	// Check for trivial simplification.
3423	if (Value V = simplifyInstruction(I: N, Q: {DL, nullptr, nullptr*, AC})) {
3424	if (!BBI ->use_empty())
3425	TranslateMap [&*BBI] = V;
3426	if (!N->mayHaveSideEffects()) {
3427	N->eraseFromParent(); // Instruction folded away, don't need actual
3428	// inst
3429	N = nullptr;
3430	}
3431	} else {
3432	if (!BBI ->use_empty())
3433	TranslateMap [&*BBI] = N;
3434	}
3435	if (N) {
3436	// Copy all debug-info attached to instructions from the last we
3437	// successfully clone, up to this instruction (they might have been
3438	// folded away).
3439	for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3440	N->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3441	SrcDbgCursor = std::next(x: BBI);
3442	// Clone debug-info on this instruction too.
3443	N->cloneDebugInfoFrom(From: &*BBI);
3444
3445	// Register the new instruction with the assumption cache if necessary.
3446	if (auto *Assume = dyn_cast<AssumeInst>(Val: N))
3447	if (AC)
3448	AC->registerAssumption(CI: Assume);
3449	}
3450	}
3451
3452	for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3453	InsertPt ->cloneDebugInfoFrom(From: &*SrcDbgCursor);
3454	InsertPt ->cloneDebugInfoFrom(From: BI);
3455
3456	BB->removePredecessor(Pred: EdgeBB);
3457	BranchInst *EdgeBI = cast<BranchInst>(Val: EdgeBB->getTerminator());
3458	EdgeBI->setSuccessor(idx: `0`, NewSucc: RealDest);
3459	EdgeBI->setDebugLoc(BI->getDebugLoc());
3460
3461	if (DTU) {
3462	SmallVector<DominatorTree::UpdateType, `2`> Updates;
3463	Updates.push_back(Elt: {DominatorTree::Delete, EdgeBB, BB});
3464	Updates.push_back(Elt: {DominatorTree::Insert, EdgeBB, RealDest});
3465	DTU->applyUpdates(Updates);
3466	}
3467
3468	// For simplicity, we created a separate basic block for the edge. Merge
3469	// it back into the predecessor if possible. This not only avoids
3470	// unnecessary SimplifyCFG iterations, but also makes sure that we don't
3471	// bypass the check for trivial cycles above.
3472	MergeBlockIntoPredecessor(BB: EdgeBB, DTU);
3473
3474	// Signal repeat, simplifying any other constants.
3475	return std::nullopt;
3476	}
3477
3478	return false;
3479	}
3480
3481	static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
3482	DomTreeUpdater *DTU,
3483	const DataLayout &DL,
3484	AssumptionCache *AC) {
3485	std::optional<bool> Result;
3486	bool EverChanged = false;
3487	do {
3488	// Note that None means "we changed things, but recurse further."
3489	Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3490	EverChanged \|= Result == std::nullopt \|\| *Result;
3491	} while (Result == std::nullopt);
3492	return EverChanged;
3493	}
3494
3495	/// Given a BB that starts with the specified two-entry PHI node,
3496	/// see if we can eliminate it.
3497	static bool FoldTwoEntryPHINode(PHINode PN, const* TargetTransformInfo &TTI,
3498	DomTreeUpdater DTU, const* DataLayout &DL) {
3499	// Ok, this is a two entry PHI node. Check to see if this is a simple "if
3500	// statement", which has a very simple dominance structure. Basically, we
3501	// are trying to find the condition that is being branched on, which
3502	// subsequently causes this merge to happen. We really want control
3503	// dependence information for this check, but simplifycfg can't keep it up
3504	// to date, and this catches most of the cases we care about anyway.
3505	BasicBlock *BB = PN->getParent();
3506
3507	BasicBlock IfTrue, IfFalse;
3508	BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3509	if (!DomBI)
3510	return false;
3511	Value *IfCond = DomBI->getCondition();
3512	// Don't bother if the branch will be constant folded trivially.
3513	if (isa<ConstantInt>(Val: IfCond))
3514	return false;
3515
3516	BasicBlock *DomBlock = DomBI->getParent();
3517	SmallVector<BasicBlock *, `2`> IfBlocks;
3518	llvm::copy_if(
3519	Range: PN->blocks(), Out: std::back_inserter(x&: IfBlocks), P: [](BasicBlock *IfBlock) {
3520	return cast<BranchInst>(Val: IfBlock->getTerminator())->isUnconditional();
3521	});
3522	assert((IfBlocks.size() == `1` \|\| IfBlocks.size() == `2`) &&
3523	"Will have either one or two blocks to speculate.");
3524
3525	// If the branch is non-unpredictable, see if we either predictably jump to
3526	// the merge bb (if we have only a single 'then' block), or if we predictably
3527	// jump to one specific 'then' block (if we have two of them).
3528	// It isn't beneficial to speculatively execute the code
3529	// from the block that we know is predictably not entered.
3530	if (!DomBI->getMetadata(KindID: LLVMContext::MD_unpredictable)) {
3531	uint64_t TWeight, FWeight;
3532	if (extractBranchWeights(I: *DomBI, TrueVal&: TWeight, FalseVal&: FWeight) &&
3533	(TWeight + FWeight) != `0`) {
3534	BranchProbability BITrueProb =
3535	BranchProbability::getBranchProbability(Numerator: TWeight, Denominator: TWeight + FWeight);
3536	BranchProbability Likely = TTI.getPredictableBranchThreshold();
3537	BranchProbability BIFalseProb = BITrueProb.getCompl();
3538	if (IfBlocks.size() == `1`) {
3539	BranchProbability BIBBProb =
3540	DomBI->getSuccessor(i: `0`) == BB ? BITrueProb : BIFalseProb;
3541	if (BIBBProb >= Likely)
3542	return false;
3543	} else {
3544	if (BITrueProb >= Likely \|\| BIFalseProb >= Likely)
3545	return false;
3546	}
3547	}
3548	}
3549
3550	// Don't try to fold an unreachable block. For example, the phi node itself
3551	// can't be the candidate if-condition for a select that we want to form.
3552	if (auto *IfCondPhiInst = dyn_cast<PHINode>(Val: IfCond))
3553	if (IfCondPhiInst->getParent() == BB)
3554	return false;
3555
3556	// Okay, we found that we can merge this two-entry phi node into a select.
3557	// Doing so would require us to fold all* two entry phi nodes in this block.*
3558	// At some point this becomes non-profitable (particularly if the target
3559	// doesn't support cmov's). Only do this transformation if there are two or
3560	// fewer PHI nodes in this block.
3561	unsigned NumPhis = `0`;
3562	for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(Val: I); ++NumPhis, ++I)
3563	if (NumPhis > `2`)
3564	return false;
3565
3566	// Loop over the PHI's seeing if we can promote them all to select
3567	// instructions. While we are at it, keep track of the instructions
3568	// that need to be moved to the dominating block.
3569	SmallPtrSet<Instruction *, `4`> AggressiveInsts;
3570	InstructionCost Cost = `0`;
3571	InstructionCost Budget =
3572	TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3573
3574	bool Changed = false;
3575	for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(Val: II);) {
3576	PHINode *PN = cast<PHINode>(Val: II ++);
3577	if (Value *V = simplifyInstruction(I: PN, Q: {DL, PN})) {
3578	PN->replaceAllUsesWith(V);
3579	PN->eraseFromParent();
3580	Changed = true;
3581	continue;
3582	}
3583
3584	if (!dominatesMergePoint(V: PN->getIncomingValue(i: `0`), BB, AggressiveInsts,
3585	Cost, Budget, TTI) \|\|
3586	!dominatesMergePoint(V: PN->getIncomingValue(i: `1`), BB, AggressiveInsts,
3587	Cost, Budget, TTI))
3588	return Changed;
3589	}
3590
3591	// If we folded the first phi, PN dangles at this point. Refresh it. If
3592	// we ran out of PHIs then we simplified them all.
3593	PN = dyn_cast<PHINode>(Val: BB->begin());
3594	if (!PN)
3595	return true;
3596
3597	// Return true if at least one of these is a 'not', and another is either
3598	// a 'not' too, or a constant.
3599	auto CanHoistNotFromBothValues = [](Value V0, Value V1) {
3600	if (!match(V: V0, P: m_Not(V: m_Value())))
3601	std::swap(a&: V0, b&: V1);
3602	auto Invertible = m_CombineOr(L: m_Not(V: m_Value()), R: m_AnyIntegralConstant());
3603	return match(V: V0, P: m_Not(V: m_Value())) && match(V: V1, P: Invertible);
3604	};
3605
3606	// Don't fold i1 branches on PHIs which contain binary operators or
3607	// (possibly inverted) select form of or/ands, unless one of
3608	// the incoming values is an 'not' and another one is freely invertible.
3609	// These can often be turned into switches and other things.
3610	auto IsBinOpOrAnd = [](Value *V) {
3611	return match(
3612	V, P: m_CombineOr(
3613	L: m_BinOp(),
3614	R: m_CombineOr(L: m_Select(C: m_Value(), L: m_ImmConstant(), R: m_Value()),
3615	R: m_Select(C: m_Value(), L: m_Value(), R: m_ImmConstant()))));
3616	};
3617	if (PN->getType()->isIntegerTy(Bitwidth: `1`) &&
3618	(IsBinOpOrAnd (PN->getIncomingValue(i: `0`)) \|\|
3619	IsBinOpOrAnd (PN->getIncomingValue(i: `1`)) \|\| IsBinOpOrAnd (IfCond)) &&
3620	!CanHoistNotFromBothValues (PN->getIncomingValue(i: `0`),
3621	PN->getIncomingValue(i: `1`)))
3622	return Changed;
3623
3624	// If all PHI nodes are promotable, check to make sure that all instructions
3625	// in the predecessor blocks can be promoted as well. If not, we won't be able
3626	// to get rid of the control flow, so it's not worth promoting to select
3627	// instructions.
3628	for (BasicBlock *IfBlock : IfBlocks)
3629	for (BasicBlock::iterator I = IfBlock->begin(); !I ->isTerminator(); ++I)
3630	if (!AggressiveInsts.count(Ptr: &*I) && !I ->isDebugOrPseudoInst()) {
3631	// This is not an aggressive instruction that we can promote.
3632	// Because of this, we won't be able to get rid of the control flow, so
3633	// the xform is not worth it.
3634	return Changed;
3635	}
3636
3637	// If either of the blocks has it's address taken, we can't do this fold.
3638	if (any_of(Range&: IfBlocks,
3639	P: [](BasicBlock IfBlock) { return* IfBlock->hasAddressTaken(); }))
3640	return Changed;
3641
3642	LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
3643	<< " T: " << IfTrue->getName()
3644	<< " F: " << IfFalse->getName() << "\n");
3645
3646	// If we can still promote the PHI nodes after this gauntlet of tests,
3647	// do all of the PHI's now.
3648
3649	// Move all 'aggressive' instructions, which are defined in the
3650	// conditional parts of the if's up to the dominating block.
3651	for (BasicBlock *IfBlock : IfBlocks)
3652	hoistAllInstructionsInto(DomBlock, InsertPt: DomBI, BB: IfBlock);
3653
3654	IRBuilder<NoFolder> Builder(DomBI);
3655	// Propagate fast-math-flags from phi nodes to replacement selects.
3656	IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3657	while (PHINode *PN = dyn_cast<PHINode>(Val: BB->begin())) {
3658	if (isa<FPMathOperator>(Val: PN))
3659	Builder.setFastMathFlags(PN->getFastMathFlags());
3660
3661	// Change the PHI node into a select instruction.
3662	Value *TrueVal = PN->getIncomingValueForBlock(BB: IfTrue);
3663	Value *FalseVal = PN->getIncomingValueForBlock(BB: IfFalse);
3664
3665	Value *Sel = Builder.CreateSelect(C: IfCond, True: TrueVal, False: FalseVal, Name: "", MDFrom: DomBI);
3666	PN->replaceAllUsesWith(V: Sel);
3667	Sel->takeName(V: PN);
3668	PN->eraseFromParent();
3669	}
3670
3671	// At this point, all IfBlocks are empty, so our if statement
3672	// has been flattened. Change DomBlock to jump directly to our new block to
3673	// avoid other simplifycfg's kicking in on the diamond.
3674	Builder.CreateBr(Dest: BB);
3675
3676	SmallVector<DominatorTree::UpdateType, `3`> Updates;
3677	if (DTU) {
3678	Updates.push_back(Elt: {DominatorTree::Insert, DomBlock, BB});
3679	for (auto *Successor : successors(BB: DomBlock))
3680	Updates.push_back(Elt: {DominatorTree::Delete, DomBlock, Successor});
3681	}
3682
3683	DomBI->eraseFromParent();
3684	if (DTU)
3685	DTU->applyUpdates(Updates);
3686
3687	return true;
3688	}
3689
3690	static Value *createLogicalOp(IRBuilderBase &Builder,
3691	Instruction::BinaryOps Opc, Value *LHS,
3692	Value RHS, const* Twine &Name = "") {
3693	// Try to relax logical op to binary op.
3694	if (impliesPoison(ValAssumedPoison: RHS, V: LHS))
3695	return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3696	if (Opc == Instruction::And)
3697	return Builder.CreateLogicalAnd(Cond1: LHS, Cond2: RHS, Name);
3698	if (Opc == Instruction::Or)
3699	return Builder.CreateLogicalOr(Cond1: LHS, Cond2: RHS, Name);
3700	llvm_unreachable("Invalid logical opcode");
3701	}
3702
3703	/// Return true if either PBI or BI has branch weight available, and store
3704	/// the weights in {Pred\|Succ}{True\|False}Weight. If one of PBI and BI does
3705	/// not have branch weight, use 1:1 as its weight.
3706	static bool extractPredSuccWeights(BranchInst PBI, BranchInst BI,
3707	uint64_t &PredTrueWeight,
3708	uint64_t &PredFalseWeight,
3709	uint64_t &SuccTrueWeight,
3710	uint64_t &SuccFalseWeight) {
3711	bool PredHasWeights =
3712	extractBranchWeights(I: *PBI, TrueVal&: PredTrueWeight, FalseVal&: PredFalseWeight);
3713	bool SuccHasWeights =
3714	extractBranchWeights(I: *BI, TrueVal&: SuccTrueWeight, FalseVal&: SuccFalseWeight);
3715	if (PredHasWeights \|\| SuccHasWeights) {
3716	if (!PredHasWeights)
3717	PredTrueWeight = PredFalseWeight = `1`;
3718	if (!SuccHasWeights)
3719	SuccTrueWeight = SuccFalseWeight = `1`;
3720	return true;
3721	} else {
3722	return false;
3723	}
3724	}
3725
3726	/// Determine if the two branches share a common destination and deduce a glue
3727	/// that joins the branches' conditions to arrive at the common destination if
3728	/// that would be profitable.
3729	static std::optional<std::tuple<BasicBlock , Instruction::BinaryOps, bool*>>
3730	shouldFoldCondBranchesToCommonDestination(BranchInst BI, BranchInst PBI,
3731	const TargetTransformInfo *TTI) {
3732	assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3733	"Both blocks must end with a conditional branches.");
3734	assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3735	"PredBB must be a predecessor of BB.");
3736
3737	// We have the potential to fold the conditions together, but if the
3738	// predecessor branch is predictable, we may not want to merge them.
3739	uint64_t PTWeight, PFWeight;
3740	BranchProbability PBITrueProb, Likely;
3741	if (TTI && !PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
3742	extractBranchWeights(I: *PBI, TrueVal&: PTWeight, FalseVal&: PFWeight) &&
3743	(PTWeight + PFWeight) != `0`) {
3744	PBITrueProb =
3745	BranchProbability::getBranchProbability(Numerator: PTWeight, Denominator: PTWeight + PFWeight);
3746	Likely = TTI->getPredictableBranchThreshold();
3747	}
3748
3749	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
3750	// Speculate the 2nd condition unless the 1st is probably true.
3751	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3752	return {{BI->getSuccessor(i: `0`), Instruction::Or, false}};
3753	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
3754	// Speculate the 2nd condition unless the 1st is probably false.
3755	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3756	return {{BI->getSuccessor(i: `1`), Instruction::And, false}};
3757	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
3758	// Speculate the 2nd condition unless the 1st is probably true.
3759	if (PBITrueProb.isUnknown() \|\| PBITrueProb < Likely)
3760	return {{BI->getSuccessor(i: `1`), Instruction::And, true}};
3761	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
3762	// Speculate the 2nd condition unless the 1st is probably false.
3763	if (PBITrueProb.isUnknown() \|\| PBITrueProb.getCompl() < Likely)
3764	return {{BI->getSuccessor(i: `0`), Instruction::Or, true}};
3765	}
3766	return std::nullopt;
3767	}
3768
3769	static bool performBranchToCommonDestFolding(BranchInst BI, BranchInst PBI,
3770	DomTreeUpdater *DTU,
3771	MemorySSAUpdater *MSSAU,
3772	const TargetTransformInfo *TTI) {
3773	BasicBlock *BB = BI->getParent();
3774	BasicBlock *PredBlock = PBI->getParent();
3775
3776	// Determine if the two branches share a common destination.
3777	BasicBlock *CommonSucc;
3778	Instruction::BinaryOps Opc;
3779	bool InvertPredCond;
3780	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) =
3781	*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3782
3783	LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << PBI << BB);
3784
3785	IRBuilder<> Builder(PBI);
3786	// The builder is used to create instructions to eliminate the branch in BB.
3787	// If BB's terminator has !annotation metadata, add it to the new
3788	// instructions.
3789	Builder.CollectMetadataToCopy(Src: BB->getTerminator(),
3790	MetadataKinds: {LLVMContext::MD_annotation});
3791
3792	// If we need to invert the condition in the pred block to match, do so now.
3793	if (InvertPredCond) {
3794	InvertBranch(PBI, Builder);
3795	}
3796
3797	BasicBlock *UniqueSucc =
3798	PBI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `0`) : BI->getSuccessor(i: `1`);
3799
3800	// Before cloning instructions, notify the successor basic block that it
3801	// is about to have a new predecessor. This will update PHI nodes,
3802	// which will allow us to update live-out uses of bonus instructions.
3803	AddPredecessorToBlock(Succ: UniqueSucc, NewPred: PredBlock, ExistPred: BB, MSSAU);
3804
3805	// Try to update branch weights.
3806	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3807	if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3808	SuccTrueWeight, SuccFalseWeight)) {
3809	SmallVector<uint64_t, `8`> NewWeights;
3810
3811	if (PBI->getSuccessor(i: `0`) == BB) {
3812	// PBI: br i1 %x, BB, FalseDest
3813	// BI: br i1 %y, UniqueSucc, FalseDest
3814	// TrueWeight is TrueWeight for PBI TrueWeight for BI.*
3815	NewWeights.push_back(Elt: PredTrueWeight * SuccTrueWeight);
3816	// FalseWeight is FalseWeight for PBI TotalWeight for BI +*
3817	// TrueWeight for PBI FalseWeight for BI.*
3818	// We assume that total weights of a BranchInst can fit into 32 bits.
3819	// Therefore, we will not have overflow using 64-bit arithmetic.
3820	NewWeights.push_back(Elt: PredFalseWeight *
3821	(SuccFalseWeight + SuccTrueWeight) +
3822	PredTrueWeight * SuccFalseWeight);
3823	} else {
3824	// PBI: br i1 %x, TrueDest, BB
3825	// BI: br i1 %y, TrueDest, UniqueSucc
3826	// TrueWeight is TrueWeight for PBI TotalWeight for BI +*
3827	// FalseWeight for PBI TrueWeight for BI.*
3828	NewWeights.push_back(Elt: PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3829	PredFalseWeight * SuccTrueWeight);
3830	// FalseWeight is FalseWeight for PBI FalseWeight for BI.*
3831	NewWeights.push_back(Elt: PredFalseWeight * SuccFalseWeight);
3832	}
3833
3834	// Halve the weights if any of them cannot fit in an uint32_t
3835	FitWeights(Weights: NewWeights);
3836
3837	SmallVector<uint32_t, `8`> MDWeights(NewWeights.begin(), NewWeights.end());
3838	setBranchWeights(I: PBI, TrueWeight: MDWeights [`0`], FalseWeight: MDWeights [`1`]);
3839
3840	// TODO: If BB is reachable from all paths through PredBlock, then we
3841	// could replace PBI's branch probabilities with BI's.
3842	} else
3843	PBI->setMetadata(KindID: LLVMContext::MD_prof, Node: nullptr);
3844
3845	// Now, update the CFG.
3846	PBI->setSuccessor(idx: PBI->getSuccessor(i: `0`) != BB, NewSucc: UniqueSucc);
3847
3848	if (DTU)
3849	DTU->applyUpdates(Updates: {{DominatorTree::Insert, PredBlock, UniqueSucc},
3850	{DominatorTree::Delete, PredBlock, BB}});
3851
3852	// If BI was a loop latch, it may have had associated loop metadata.
3853	// We need to copy it to the new latch, that is, PBI.
3854	if (MDNode *LoopMD = BI->getMetadata(KindID: LLVMContext::MD_loop))
3855	PBI->setMetadata(KindID: LLVMContext::MD_loop, Node: LoopMD);
3856
3857	ValueToValueMapTy VMap; // maps original values to cloned values
3858	CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3859
3860	Module *M = BB->getModule();
3861
3862	if (PredBlock->IsNewDbgInfoFormat) {
3863	PredBlock->getTerminator()->cloneDebugInfoFrom(From: BB->getTerminator());
3864	for (DbgVariableRecord &DVR :
3865	filterDbgVars(R: PredBlock->getTerminator()->getDbgRecordRange())) {
3866	RemapDbgVariableRecord(M, V: &DVR, VM&: VMap,
3867	Flags: RF_NoModuleLevelChanges \| RF_IgnoreMissingLocals);
3868	}
3869	}
3870
3871	// Now that the Cond was cloned into the predecessor basic block,
3872	// or/and the two conditions together.
3873	Value *BICond = VMap [BI->getCondition()];
3874	PBI->setCondition(
3875	createLogicalOp(Builder, Opc, LHS: PBI->getCondition(), RHS: BICond, Name: "or.cond"));
3876
3877	++NumFoldBranchToCommonDest;
3878	return true;
3879	}
3880
3881	/// Return if an instruction's type or any of its operands' types are a vector
3882	/// type.
3883	static bool isVectorOp(Instruction &I) {
3884	return I.getType()->isVectorTy() \|\| any_of(Range: I.operands(), P: [](Use &U) {
3885	return U ->getType()->isVectorTy();
3886	});
3887	}
3888
3889	/// If this basic block is simple enough, and if a predecessor branches to us
3890	/// and one of our successors, fold the block into the predecessor and use
3891	/// logical operations to pick the right destination.
3892	bool llvm::FoldBranchToCommonDest(BranchInst BI, DomTreeUpdater DTU,
3893	MemorySSAUpdater *MSSAU,
3894	const TargetTransformInfo *TTI,
3895	unsigned BonusInstThreshold) {
3896	// If this block ends with an unconditional branch,
3897	// let SpeculativelyExecuteBB() deal with it.
3898	if (!BI->isConditional())
3899	return false;
3900
3901	BasicBlock *BB = BI->getParent();
3902	TargetTransformInfo::TargetCostKind CostKind =
3903	BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
3904	: TargetTransformInfo::TCK_SizeAndLatency;
3905
3906	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
3907
3908	if (!Cond \|\|
3909	(!isa<CmpInst>(Val: Cond) && !isa<BinaryOperator>(Val: Cond) &&
3910	!isa<SelectInst>(Val: Cond)) \|\|
3911	Cond->getParent() != BB \|\| !Cond->hasOneUse())
3912	return false;
3913
3914	// Finally, don't infinitely unroll conditional loops.
3915	if (is_contained(Range: successors(BB), Element: BB))
3916	return false;
3917
3918	// With which predecessors will we want to deal with?
3919	SmallVector<BasicBlock *, `8`> Preds;
3920	for (BasicBlock *PredBlock : predecessors(BB)) {
3921	BranchInst *PBI = dyn_cast<BranchInst>(Val: PredBlock->getTerminator());
3922
3923	// Check that we have two conditional branches. If there is a PHI node in
3924	// the common successor, verify that the same value flows in from both
3925	// blocks.
3926	if (!PBI \|\| PBI->isUnconditional() \|\| !SafeToMergeTerminators(SI1: BI, SI2: PBI))
3927	continue;
3928
3929	// Determine if the two branches share a common destination.
3930	BasicBlock *CommonSucc;
3931	Instruction::BinaryOps Opc;
3932	bool InvertPredCond;
3933	if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3934	std::tie(args&: CommonSucc, args&: Opc, args&: InvertPredCond) = *Recipe;
3935	else
3936	continue;
3937
3938	// Check the cost of inserting the necessary logic before performing the
3939	// transformation.
3940	if (TTI) {
3941	Type *Ty = BI->getCondition()->getType();
3942	InstructionCost Cost = TTI->getArithmeticInstrCost(Opcode: Opc, Ty, CostKind);
3943	if (InvertPredCond && (!PBI->getCondition()->hasOneUse() \|\|
3944	!isa<CmpInst>(Val: PBI->getCondition())))
3945	Cost += TTI->getArithmeticInstrCost(Opcode: Instruction::Xor, Ty, CostKind);
3946
3947	if (Cost > BranchFoldThreshold)
3948	continue;
3949	}
3950
3951	// Ok, we do want to deal with this predecessor. Record it.
3952	Preds.emplace_back(Args&: PredBlock);
3953	}
3954
3955	// If there aren't any predecessors into which we can fold,
3956	// don't bother checking the cost.
3957	if (Preds.empty())
3958	return false;
3959
3960	// Only allow this transformation if computing the condition doesn't involve
3961	// too many instructions and these involved instructions can be executed
3962	// unconditionally. We denote all involved instructions except the condition
3963	// as "bonus instructions", and only allow this transformation when the
3964	// number of the bonus instructions we'll need to create when cloning into
3965	// each predecessor does not exceed a certain threshold.
3966	unsigned NumBonusInsts = `0`;
3967	bool SawVectorOp = false;
3968	const unsigned PredCount = Preds.size();
3969	for (Instruction &I : *BB) {
3970	// Don't check the branch condition comparison itself.
3971	if (&I == Cond)
3972	continue;
3973	// Ignore dbg intrinsics, and the terminator.
3974	if (isa<DbgInfoIntrinsic>(Val: I) \|\| isa<BranchInst>(Val: I))
3975	continue;
3976	// I must be safe to execute unconditionally.
3977	if (!isSafeToSpeculativelyExecute(I: &I))
3978	return false;
3979	SawVectorOp \|= isVectorOp(I);
3980
3981	// Account for the cost of duplicating this instruction into each
3982	// predecessor. Ignore free instructions.
3983	if (!TTI \|\| TTI->getInstructionCost(U: &I, CostKind) !=
3984	TargetTransformInfo::TCC_Free) {
3985	NumBonusInsts += PredCount;
3986
3987	// Early exits once we reach the limit.
3988	if (NumBonusInsts >
3989	BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3990	return false;
3991	}
3992
3993	auto IsBCSSAUse = [BB, &I](Use &U) {
3994	auto *UI = cast<Instruction>(Val: U.getUser());
3995	if (auto *PN = dyn_cast<PHINode>(Val: UI))
3996	return PN->getIncomingBlock(U) == BB;
3997	return UI->getParent() == BB && I.comesBefore(Other: UI);
3998	};
3999
4000	// Does this instruction require rewriting of uses?
4001	if (!all_of(Range: I.uses(), P: IsBCSSAUse))
4002	return false;
4003	}
4004	if (NumBonusInsts >
4005	BonusInstThreshold *
4006	(SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : `1`))
4007	return false;
4008
4009	// Ok, we have the budget. Perform the transformation.
4010	for (BasicBlock *PredBlock : Preds) {
4011	auto *PBI = cast<BranchInst>(Val: PredBlock->getTerminator());
4012	return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4013	}
4014	return false;
4015	}
4016
4017	// If there is only one store in BB1 and BB2, return it, otherwise return
4018	// nullptr.
4019	static StoreInst findUniqueStoreInBlocks(BasicBlock BB1, BasicBlock *BB2) {
4020	StoreInst S = nullptr*;
4021	for (auto *BB : {BB1, BB2}) {
4022	if (!BB)
4023	continue;
4024	for (auto &I : *BB)
4025	if (auto *SI = dyn_cast<StoreInst>(Val: &I)) {
4026	if (S)
4027	// Multiple stores seen.
4028	return nullptr;
4029	else
4030	S = SI;
4031	}
4032	}
4033	return S;
4034	}
4035
4036	static Value ensureValueAvailableInSuccessor(Value V, BasicBlock *BB,
4037	Value AlternativeV = nullptr*) {
4038	// PHI is going to be a PHI node that allows the value V that is defined in
4039	// BB to be referenced in BB's only successor.
4040	//
4041	// If AlternativeV is nullptr, the only value we care about in PHI is V. It
4042	// doesn't matter to us what the other operand is (it'll never get used). We
4043	// could just create a new PHI with an undef incoming value, but that could
4044	// increase register pressure if EarlyCSE/InstCombine can't fold it with some
4045	// other PHI. So here we directly look for some PHI in BB's successor with V
4046	// as an incoming operand. If we find one, we use it, else we create a new
4047	// one.
4048	//
4049	// If AlternativeV is not nullptr, we care about both incoming values in PHI.
4050	// PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4051	// where OtherBB is the single other predecessor of BB's only successor.
4052	PHINode PHI = nullptr*;
4053	BasicBlock *Succ = BB->getSingleSuccessor();
4054
4055	for (auto I = Succ->begin(); isa<PHINode>(Val: I); ++I)
4056	if (cast<PHINode>(Val&: I)->getIncomingValueForBlock(BB) == V) {
4057	PHI = cast<PHINode>(Val&: I);
4058	if (!AlternativeV)
4059	break;
4060
4061	assert(Succ->hasNPredecessors(`2`));
4062	auto PredI = pred_begin(BB: Succ);
4063	BasicBlock OtherPredBB = PredI == BB ? ++PredI : PredI;
4064	if (PHI->getIncomingValueForBlock(BB: OtherPredBB) == AlternativeV)
4065	break;
4066	PHI = nullptr;
4067	}
4068	if (PHI)
4069	return PHI;
4070
4071	// If V is not an instruction defined in BB, just return it.
4072	if (!AlternativeV &&
4073	(!isa<Instruction>(Val: V) \|\| cast<Instruction>(Val: V)->getParent() != BB))
4074	return V;
4075
4076	PHI = PHINode::Create(Ty: V->getType(), NumReservedValues: `2`, NameStr: "simplifycfg.merge");
4077	PHI->insertBefore(InsertPos: Succ->begin());
4078	PHI->addIncoming(V, BB);
4079	for (BasicBlock *PredBB : predecessors(BB: Succ))
4080	if (PredBB != BB)
4081	PHI->addIncoming(
4082	V: AlternativeV ? AlternativeV : PoisonValue::get(T: V->getType()), BB: PredBB);
4083	return PHI;
4084	}
4085
4086	static bool mergeConditionalStoreToAddress(
4087	BasicBlock PTB, BasicBlock PFB, BasicBlock QTB, BasicBlock QFB,
4088	BasicBlock PostBB, Value Address, bool InvertPCond, bool InvertQCond,
4089	DomTreeUpdater DTU, const* DataLayout &DL, const TargetTransformInfo &TTI) {
4090	// For every pointer, there must be exactly two stores, one coming from
4091	// PTB or PFB, and the other from QTB or QFB. We don't support more than one
4092	// store (to any address) in PTB,PFB or QTB,QFB.
4093	// FIXME: We could relax this restriction with a bit more work and performance
4094	// testing.
4095	StoreInst *PStore = findUniqueStoreInBlocks(BB1: PTB, BB2: PFB);
4096	StoreInst *QStore = findUniqueStoreInBlocks(BB1: QTB, BB2: QFB);
4097	if (!PStore \|\| !QStore)
4098	return false;
4099
4100	// Now check the stores are compatible.
4101	if (!QStore->isUnordered() \|\| !PStore->isUnordered() \|\|
4102	PStore->getValueOperand()->getType() !=
4103	QStore->getValueOperand()->getType())
4104	return false;
4105
4106	// Check that sinking the store won't cause program behavior changes. Sinking
4107	// the store out of the Q blocks won't change any behavior as we're sinking
4108	// from a block to its unconditional successor. But we're moving a store from
4109	// the P blocks down through the middle block (QBI) and past both QFB and QTB.
4110	// So we need to check that there are no aliasing loads or stores in
4111	// QBI, QTB and QFB. We also need to check there are no conflicting memory
4112	// operations between PStore and the end of its parent block.
4113	//
4114	// The ideal way to do this is to query AliasAnalysis, but we don't
4115	// preserve AA currently so that is dangerous. Be super safe and just
4116	// check there are no other memory operations at all.
4117	for (auto &I : *QFB->getSinglePredecessor())
4118	if (I.mayReadOrWriteMemory())
4119	return false;
4120	for (auto &I : *QFB)
4121	if (&I != QStore && I.mayReadOrWriteMemory())
4122	return false;
4123	if (QTB)
4124	for (auto &I : *QTB)
4125	if (&I != QStore && I.mayReadOrWriteMemory())
4126	return false;
4127	for (auto I = BasicBlock::iterator (PStore), E = PStore->getParent()->end();
4128	I != E; ++I)
4129	if (&*I != PStore && I ->mayReadOrWriteMemory())
4130	return false;
4131
4132	// If we're not in aggressive mode, we only optimize if we have some
4133	// confidence that by optimizing we'll allow P and/or Q to be if-converted.
4134	auto IsWorthwhile = [&](BasicBlock BB, ArrayRef<StoreInst > FreeStores) {
4135	if (!BB)
4136	return true;
4137	// Heuristic: if the block can be if-converted/phi-folded and the
4138	// instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4139	// thread this store.
4140	InstructionCost Cost = `0`;
4141	InstructionCost Budget =
4142	PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4143	for (auto &I : BB->instructionsWithoutDebug(SkipPseudoOp: false)) {
4144	// Consider terminator instruction to be free.
4145	if (I.isTerminator())
4146	continue;
4147	// If this is one the stores that we want to speculate out of this BB,
4148	// then don't count it's cost, consider it to be free.
4149	if (auto *S = dyn_cast<StoreInst>(Val: &I))
4150	if (llvm::find(Range&: FreeStores, Val: S))
4151	continue;
4152	// Else, we have a white-list of instructions that we are ak speculating.
4153	if (!isa<BinaryOperator>(Val: I) && !isa<GetElementPtrInst>(Val: I))
4154	return false; // Not in white-list - not worthwhile folding.
4155	// And finally, if this is a non-free instruction that we are okay
4156	// speculating, ensure that we consider the speculation budget.
4157	Cost +=
4158	TTI.getInstructionCost(U: &I, CostKind: TargetTransformInfo::TCK_SizeAndLatency);
4159	if (Cost > Budget)
4160	return false; // Eagerly refuse to fold as soon as we're out of budget.
4161	}
4162	assert(Cost <= Budget &&
4163	"When we run out of budget we will eagerly return from within the "
4164	"per-instruction loop.");
4165	return true;
4166	};
4167
4168	const std::array<StoreInst *, `2`> FreeStores = {PStore, QStore};
4169	if (!MergeCondStoresAggressively &&
4170	(!IsWorthwhile (PTB, FreeStores) \|\| !IsWorthwhile (PFB, FreeStores) \|\|
4171	!IsWorthwhile (QTB, FreeStores) \|\| !IsWorthwhile (QFB, FreeStores)))
4172	return false;
4173
4174	// If PostBB has more than two predecessors, we need to split it so we can
4175	// sink the store.
4176	if (std::next(x: pred_begin(BB: PostBB), n: `2`) != pred_end(BB: PostBB)) {
4177	// We know that QFB's only successor is PostBB. And QFB has a single
4178	// predecessor. If QTB exists, then its only successor is also PostBB.
4179	// If QTB does not exist, then QFB's only predecessor has a conditional
4180	// branch to QFB and PostBB.
4181	BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4182	BasicBlock *NewBB =
4183	SplitBlockPredecessors(BB: PostBB, Preds: {QFB, TruePred}, Suffix: "condstore.split", DTU);
4184	if (!NewBB)
4185	return false;
4186	PostBB = NewBB;
4187	}
4188
4189	// OK, we're going to sink the stores to PostBB. The store has to be
4190	// conditional though, so first create the predicate.
4191	Value *PCond = cast<BranchInst>(Val: PFB->getSinglePredecessor()->getTerminator())
4192	->getCondition();
4193	Value *QCond = cast<BranchInst>(Val: QFB->getSinglePredecessor()->getTerminator())
4194	->getCondition();
4195
4196	Value *PPHI = ensureValueAvailableInSuccessor(V: PStore->getValueOperand(),
4197	BB: PStore->getParent());
4198	Value *QPHI = ensureValueAvailableInSuccessor(V: QStore->getValueOperand(),
4199	BB: QStore->getParent(), AlternativeV: PPHI);
4200
4201	BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4202	IRBuilder<> QB(PostBB, PostBBFirst);
4203	QB.SetCurrentDebugLocation(PostBBFirst ->getStableDebugLoc());
4204
4205	Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(V: PCond);
4206	Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(V: QCond);
4207
4208	if (InvertPCond)
4209	PPred = QB.CreateNot(V: PPred);
4210	if (InvertQCond)
4211	QPred = QB.CreateNot(V: QPred);
4212	Value *CombinedPred = QB.CreateOr(LHS: PPred, RHS: QPred);
4213
4214	BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4215	auto *T = SplitBlockAndInsertIfThen(Cond: CombinedPred, SplitBefore: InsertPt,
4216	/Unreachable=/false,
4217	/BranchWeights=/nullptr, DTU);
4218
4219	QB.SetInsertPoint(T);
4220	StoreInst *SI = cast<StoreInst>(Val: QB.CreateStore(Val: QPHI, Ptr: Address));
4221	SI->setAAMetadata(PStore->getAAMetadata().merge(Other: QStore->getAAMetadata()));
4222	// Choose the minimum alignment. If we could prove both stores execute, we
4223	// could use biggest one. In this case, though, we only know that one of the
4224	// stores executes. And we don't know it's safe to take the alignment from a
4225	// store that doesn't execute.
4226	SI->setAlignment(std::min(a: PStore->getAlign(), b: QStore->getAlign()));
4227
4228	QStore->eraseFromParent();
4229	PStore->eraseFromParent();
4230
4231	return true;
4232	}
4233
4234	static bool mergeConditionalStores(BranchInst PBI, BranchInst QBI,
4235	DomTreeUpdater DTU, const* DataLayout &DL,
4236	const TargetTransformInfo &TTI) {
4237	// The intention here is to find diamonds or triangles (see below) where each
4238	// conditional block contains a store to the same address. Both of these
4239	// stores are conditional, so they can't be unconditionally sunk. But it may
4240	// be profitable to speculatively sink the stores into one merged store at the
4241	// end, and predicate the merged store on the union of the two conditions of
4242	// PBI and QBI.
4243	//
4244	// This can reduce the number of stores executed if both of the conditions are
4245	// true, and can allow the blocks to become small enough to be if-converted.
4246	// This optimization will also chain, so that ladders of test-and-set
4247	// sequences can be if-converted away.
4248	//
4249	// We only deal with simple diamonds or triangles:
4250	//
4251	// PBI or PBI or a combination of the two
4252	// / \ \| \
4253	// PTB PFB \| PFB
4254	// \ / \| /
4255	// QBI QBI
4256	// / \ \| \
4257	// QTB QFB \| QFB
4258	// \ / \| /
4259	// PostBB PostBB
4260	//
4261	// We model triangles as a type of diamond with a nullptr "true" block.
4262	// Triangles are canonicalized so that the fallthrough edge is represented by
4263	// a true condition, as in the diagram above.
4264	BasicBlock *PTB = PBI->getSuccessor(i: `0`);
4265	BasicBlock *PFB = PBI->getSuccessor(i: `1`);
4266	BasicBlock *QTB = QBI->getSuccessor(i: `0`);
4267	BasicBlock *QFB = QBI->getSuccessor(i: `1`);
4268	BasicBlock *PostBB = QFB->getSingleSuccessor();
4269
4270	// Make sure we have a good guess for PostBB. If QTB's only successor is
4271	// QFB, then QFB is a better PostBB.
4272	if (QTB->getSingleSuccessor() == QFB)
4273	PostBB = QFB;
4274
4275	// If we couldn't find a good PostBB, stop.
4276	if (!PostBB)
4277	return false;
4278
4279	bool InvertPCond = false, InvertQCond = false;
4280	// Canonicalize fallthroughs to the true branches.
4281	if (PFB == QBI->getParent()) {
4282	std::swap(a&: PFB, b&: PTB);
4283	InvertPCond = true;
4284	}
4285	if (QFB == PostBB) {
4286	std::swap(a&: QFB, b&: QTB);
4287	InvertQCond = true;
4288	}
4289
4290	// From this point on we can assume PTB or QTB may be fallthroughs but PFB
4291	// and QFB may not. Model fallthroughs as a nullptr block.
4292	if (PTB == QBI->getParent())
4293	PTB = nullptr;
4294	if (QTB == PostBB)
4295	QTB = nullptr;
4296
4297	// Legality bailouts. We must have at least the non-fallthrough blocks and
4298	// the post-dominating block, and the non-fallthroughs must only have one
4299	// predecessor.
4300	auto HasOnePredAndOneSucc = [](BasicBlock BB, BasicBlock P, BasicBlock *S) {
4301	return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4302	};
4303	if (!HasOnePredAndOneSucc (PFB, PBI->getParent(), QBI->getParent()) \|\|
4304	!HasOnePredAndOneSucc (QFB, QBI->getParent(), PostBB))
4305	return false;
4306	if ((PTB && !HasOnePredAndOneSucc (PTB, PBI->getParent(), QBI->getParent())) \|\|
4307	(QTB && !HasOnePredAndOneSucc (QTB, QBI->getParent(), PostBB)))
4308	return false;
4309	if (!QBI->getParent()->hasNUses(N: `2`))
4310	return false;
4311
4312	// OK, this is a sequence of two diamonds or triangles.
4313	// Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4314	SmallPtrSet<Value *, `4`> PStoreAddresses, QStoreAddresses;
4315	for (auto *BB : {PTB, PFB}) {
4316	if (!BB)
4317	continue;
4318	for (auto &I : *BB)
4319	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4320	PStoreAddresses.insert(Ptr: SI->getPointerOperand());
4321	}
4322	for (auto *BB : {QTB, QFB}) {
4323	if (!BB)
4324	continue;
4325	for (auto &I : *BB)
4326	if (StoreInst *SI = dyn_cast<StoreInst>(Val: &I))
4327	QStoreAddresses.insert(Ptr: SI->getPointerOperand());
4328	}
4329
4330	set_intersect(S1&: PStoreAddresses, S2: QStoreAddresses);
4331	// set_intersect mutates PStoreAddresses in place. Rename it here to make it
4332	// clear what it contains.
4333	auto &CommonAddresses = PStoreAddresses;
4334
4335	bool Changed = false;
4336	for (auto *Address : CommonAddresses)
4337	Changed \|=
4338	mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4339	InvertPCond, InvertQCond, DTU, DL, TTI);
4340	return Changed;
4341	}
4342
4343	/// If the previous block ended with a widenable branch, determine if reusing
4344	/// the target block is profitable and legal. This will have the effect of
4345	/// "widening" PBI, but doesn't require us to reason about hosting safety.
4346	static bool tryWidenCondBranchToCondBranch(BranchInst PBI, BranchInst BI,
4347	DomTreeUpdater *DTU) {
4348	// TODO: This can be generalized in two important ways:
4349	// 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4350	// values from the PBI edge.
4351	// 2) We can sink side effecting instructions into BI's fallthrough
4352	// successor provided they doesn't contribute to computation of
4353	// BI's condition.
4354	BasicBlock *IfTrueBB = PBI->getSuccessor(i: `0`);
4355	BasicBlock *IfFalseBB = PBI->getSuccessor(i: `1`);
4356	if (!isWidenableBranch(U: PBI) \|\| IfTrueBB != BI->getParent() \|\|
4357	!BI->getParent()->getSinglePredecessor())
4358	return false;
4359	if (!IfFalseBB->phis().empty())
4360	return false; // TODO
4361	// This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4362	// may undo the transform done here.
4363	// TODO: There might be a more fine-grained solution to this.
4364	if (!llvm::succ_empty(BB: IfFalseBB))
4365	return false;
4366	// Use lambda to lazily compute expensive condition after cheap ones.
4367	auto NoSideEffects = [](BasicBlock &BB) {
4368	return llvm::none_of(Range&: BB, P: [](const Instruction &I) {
4369	return I.mayWriteToMemory() \|\| I.mayHaveSideEffects();
4370	});
4371	};
4372	if (BI->getSuccessor(i: `1`) != IfFalseBB && // no inf looping
4373	BI->getSuccessor(i: `1`)->getTerminatingDeoptimizeCall() && // profitability
4374	NoSideEffects (*BI->getParent())) {
4375	auto *OldSuccessor = BI->getSuccessor(i: `1`);
4376	OldSuccessor->removePredecessor(Pred: BI->getParent());
4377	BI->setSuccessor(idx: `1`, NewSucc: IfFalseBB);
4378	if (DTU)
4379	DTU->applyUpdates(
4380	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4381	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4382	return true;
4383	}
4384	if (BI->getSuccessor(i: `0`) != IfFalseBB && // no inf looping
4385	BI->getSuccessor(i: `0`)->getTerminatingDeoptimizeCall() && // profitability
4386	NoSideEffects (*BI->getParent())) {
4387	auto *OldSuccessor = BI->getSuccessor(i: `0`);
4388	OldSuccessor->removePredecessor(Pred: BI->getParent());
4389	BI->setSuccessor(idx: `0`, NewSucc: IfFalseBB);
4390	if (DTU)
4391	DTU->applyUpdates(
4392	Updates: {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4393	{DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4394	return true;
4395	}
4396	return false;
4397	}
4398
4399	/// If we have a conditional branch as a predecessor of another block,
4400	/// this function tries to simplify it. We know
4401	/// that PBI and BI are both conditional branches, and BI is in one of the
4402	/// successor blocks of PBI - PBI branches to BI.
4403	static bool SimplifyCondBranchToCondBranch(BranchInst PBI, BranchInst BI,
4404	DomTreeUpdater *DTU,
4405	const DataLayout &DL,
4406	const TargetTransformInfo &TTI) {
4407	assert(PBI->isConditional() && BI->isConditional());
4408	BasicBlock *BB = BI->getParent();
4409
4410	// If this block ends with a branch instruction, and if there is a
4411	// predecessor that ends on a branch of the same condition, make
4412	// this conditional branch redundant.
4413	if (PBI->getCondition() == BI->getCondition() &&
4414	PBI->getSuccessor(i: `0`) != PBI->getSuccessor(i: `1`)) {
4415	// Okay, the outcome of this conditional branch is statically
4416	// knowable. If this block had a single pred, handle specially, otherwise
4417	// FoldCondBranchOnValueKnownInPredecessor() will handle it.
4418	if (BB->getSinglePredecessor()) {
4419	// Turn this into a branch on constant.
4420	bool CondIsTrue = PBI->getSuccessor(i: `0`) == BB;
4421	BI->setCondition(
4422	ConstantInt::get(Ty: Type::getInt1Ty(C&: BB->getContext()), V: CondIsTrue));
4423	return true; // Nuke the branch on constant.
4424	}
4425	}
4426
4427	// If the previous block ended with a widenable branch, determine if reusing
4428	// the target block is profitable and legal. This will have the effect of
4429	// "widening" PBI, but doesn't require us to reason about hosting safety.
4430	if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4431	return true;
4432
4433	// If both branches are conditional and both contain stores to the same
4434	// address, remove the stores from the conditionals and create a conditional
4435	// merged store at the end.
4436	if (MergeCondStores && mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
4437	return true;
4438
4439	// If this is a conditional branch in an empty block, and if any
4440	// predecessors are a conditional branch to one of our destinations,
4441	// fold the conditions into logical ops and one cond br.
4442
4443	// Ignore dbg intrinsics.
4444	if (&BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin() != BI)
4445	return false;
4446
4447	int PBIOp, BIOp;
4448	if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `0`)) {
4449	PBIOp = `0`;
4450	BIOp = `0`;
4451	} else if (PBI->getSuccessor(i: `0`) == BI->getSuccessor(i: `1`)) {
4452	PBIOp = `0`;
4453	BIOp = `1`;
4454	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `0`)) {
4455	PBIOp = `1`;
4456	BIOp = `0`;
4457	} else if (PBI->getSuccessor(i: `1`) == BI->getSuccessor(i: `1`)) {
4458	PBIOp = `1`;
4459	BIOp = `1`;
4460	} else {
4461	return false;
4462	}
4463
4464	// Check to make sure that the other destination of this branch
4465	// isn't BB itself. If so, this is an infinite loop that will
4466	// keep getting unwound.
4467	if (PBI->getSuccessor(i: PBIOp) == BB)
4468	return false;
4469
4470	// If predecessor's branch probability to BB is too low don't merge branches.
4471	SmallVector<uint32_t, `2`> PredWeights;
4472	if (!PBI->getMetadata(KindID: LLVMContext::MD_unpredictable) &&
4473	extractBranchWeights(I: *PBI, Weights&: PredWeights) &&
4474	(static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]) != `0`) {
4475
4476	BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4477	Numerator: PredWeights [PBIOp],
4478	Denominator: static_cast<uint64_t>(PredWeights [`0`]) + PredWeights [`1`]);
4479
4480	BranchProbability Likely = TTI.getPredictableBranchThreshold();
4481	if (CommonDestProb >= Likely)
4482	return false;
4483	}
4484
4485	// Do not perform this transformation if it would require
4486	// insertion of a large number of select instructions. For targets
4487	// without predication/cmovs, this is a big pessimization.
4488
4489	BasicBlock *CommonDest = PBI->getSuccessor(i: PBIOp);
4490	BasicBlock *RemovedDest = PBI->getSuccessor(i: PBIOp ^ `1`);
4491	unsigned NumPhis = `0`;
4492	for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(Val: II);
4493	++II, ++NumPhis) {
4494	if (NumPhis > `2`) // Disable this xform.
4495	return false;
4496	}
4497
4498	// Finally, if everything is ok, fold the branches to logical ops.
4499	BasicBlock *OtherDest = BI->getSuccessor(i: BIOp ^ `1`);
4500
4501	LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4502	<< "AND: " << *BI->getParent());
4503
4504	SmallVector<DominatorTree::UpdateType, `5`> Updates;
4505
4506	// If OtherDest is* BB, then BB is a basic block with a single conditional*
4507	// branch in it, where one edge (OtherDest) goes back to itself but the other
4508	// exits. We don't know* that the program avoids the infinite loop*
4509	// (even though that seems likely). If we do this xform naively, we'll end up
4510	// recursively unpeeling the loop. Since we know that (after the xform is
4511	// done) that the block is* infinite if reached, we just make it an obviously*
4512	// infinite loop with no cond branch.
4513	if (OtherDest == BB) {
4514	// Insert it at the end of the function, because it's either code,
4515	// or it won't matter if it's hot. :)
4516	BasicBlock *InfLoopBlock =
4517	BasicBlock::Create(Context&: BB->getContext(), Name: "infloop", Parent: BB->getParent());
4518	BranchInst::Create(IfTrue: InfLoopBlock, InsertAtEnd: InfLoopBlock);
4519	if (DTU)
4520	Updates.push_back(Elt: {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4521	OtherDest = InfLoopBlock;
4522	}
4523
4524	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4525
4526	// BI may have other predecessors. Because of this, we leave
4527	// it alone, but modify PBI.
4528
4529	// Make sure we get to CommonDest on True&True directions.
4530	Value *PBICond = PBI->getCondition();
4531	IRBuilder<NoFolder> Builder(PBI);
4532	if (PBIOp)
4533	PBICond = Builder.CreateNot(V: PBICond, Name: PBICond->getName() + ".not");
4534
4535	Value *BICond = BI->getCondition();
4536	if (BIOp)
4537	BICond = Builder.CreateNot(V: BICond, Name: BICond->getName() + ".not");
4538
4539	// Merge the conditions.
4540	Value *Cond =
4541	createLogicalOp(Builder, Opc: Instruction::Or, LHS: PBICond, RHS: BICond, Name: "brmerge");
4542
4543	// Modify PBI to branch on the new condition to the new dests.
4544	PBI->setCondition(Cond);
4545	PBI->setSuccessor(idx: `0`, NewSucc: CommonDest);
4546	PBI->setSuccessor(idx: `1`, NewSucc: OtherDest);
4547
4548	if (DTU) {
4549	Updates.push_back(Elt: {DominatorTree::Insert, PBI->getParent(), OtherDest});
4550	Updates.push_back(Elt: {DominatorTree::Delete, PBI->getParent(), RemovedDest});
4551
4552	DTU->applyUpdates(Updates);
4553	}
4554
4555	// Update branch weight for PBI.
4556	uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4557	uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4558	bool HasWeights =
4559	extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4560	SuccTrueWeight, SuccFalseWeight);
4561	if (HasWeights) {
4562	PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4563	PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4564	SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4565	SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4566	// The weight to CommonDest should be PredCommon SuccTotal +*
4567	// PredOther SuccCommon.*
4568	// The weight to OtherDest should be PredOther SuccOther.*
4569	uint64_t NewWeights[`2`] = {PredCommon * (SuccCommon + SuccOther) +
4570	PredOther * SuccCommon,
4571	PredOther * SuccOther};
4572	// Halve the weights if any of them cannot fit in an uint32_t
4573	FitWeights(Weights: NewWeights);
4574
4575	setBranchWeights(I: PBI, TrueWeight: NewWeights[`0`], FalseWeight: NewWeights[`1`]);
4576	}
4577
4578	// OtherDest may have phi nodes. If so, add an entry from PBI's
4579	// block that are identical to the entries for BI's block.
4580	AddPredecessorToBlock(Succ: OtherDest, NewPred: PBI->getParent(), ExistPred: BB);
4581
4582	// We know that the CommonDest already had an edge from PBI to
4583	// it. If it has PHIs though, the PHIs may have different
4584	// entries for BB and PBI's BB. If so, insert a select to make
4585	// them agree.
4586	for (PHINode &PN : CommonDest->phis()) {
4587	Value *BIV = PN.getIncomingValueForBlock(BB);
4588	unsigned PBBIdx = PN.getBasicBlockIndex(BB: PBI->getParent());
4589	Value *PBIV = PN.getIncomingValue(i: PBBIdx);
4590	if (BIV != PBIV) {
4591	// Insert a select in PBI to pick the right value.
4592	SelectInst *NV = cast<SelectInst>(
4593	Val: Builder.CreateSelect(C: PBICond, True: PBIV, False: BIV, Name: PBIV->getName() + ".mux"));
4594	PN.setIncomingValue(i: PBBIdx, V: NV);
4595	// Although the select has the same condition as PBI, the original branch
4596	// weights for PBI do not apply to the new select because the select's
4597	// 'logical' edges are incoming edges of the phi that is eliminated, not
4598	// the outgoing edges of PBI.
4599	if (HasWeights) {
4600	uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4601	uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4602	uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4603	uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4604	// The weight to PredCommonDest should be PredCommon SuccTotal.*
4605	// The weight to PredOtherDest should be PredOther SuccCommon.*
4606	uint64_t NewWeights[`2`] = {PredCommon * (SuccCommon + SuccOther),
4607	PredOther * SuccCommon};
4608
4609	FitWeights(Weights: NewWeights);
4610
4611	setBranchWeights(I: NV, TrueWeight: NewWeights[`0`], FalseWeight: NewWeights[`1`]);
4612	}
4613	}
4614	}
4615
4616	LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4617	LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4618
4619	// This basic block is probably dead. We know it has at least
4620	// one fewer predecessor.
4621	return true;
4622	}
4623
4624	// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4625	// true or to FalseBB if Cond is false.
4626	// Takes care of updating the successors and removing the old terminator.
4627	// Also makes sure not to introduce new successors by assuming that edges to
4628	// non-successor TrueBBs and FalseBBs aren't reachable.
4629	bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4630	Value Cond, BasicBlock TrueBB,
4631	BasicBlock *FalseBB,
4632	uint32_t TrueWeight,
4633	uint32_t FalseWeight) {
4634	auto *BB = OldTerm->getParent();
4635	// Remove any superfluous successor edges from the CFG.
4636	// First, figure out which successors to preserve.
4637	// If TrueBB and FalseBB are equal, only try to preserve one copy of that
4638	// successor.
4639	BasicBlock *KeepEdge1 = TrueBB;
4640	BasicBlock KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr*;
4641
4642	SmallSetVector<BasicBlock *, `2`> RemovedSuccessors;
4643
4644	// Then remove the rest.
4645	for (BasicBlock *Succ : successors(I: OldTerm)) {
4646	// Make sure only to keep exactly one copy of each edge.
4647	if (Succ == KeepEdge1)
4648	KeepEdge1 = nullptr;
4649	else if (Succ == KeepEdge2)
4650	KeepEdge2 = nullptr;
4651	else {
4652	Succ->removePredecessor(Pred: BB,
4653	/KeepOneInputPHIs=/true);
4654
4655	if (Succ != TrueBB && Succ != FalseBB)
4656	RemovedSuccessors.insert(X: Succ);
4657	}
4658	}
4659
4660	IRBuilder<> Builder(OldTerm);
4661	Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4662
4663	// Insert an appropriate new terminator.
4664	if (!KeepEdge1 && !KeepEdge2) {
4665	if (TrueBB == FalseBB) {
4666	// We were only looking for one successor, and it was present.
4667	// Create an unconditional branch to it.
4668	Builder.CreateBr(Dest: TrueBB);
4669	} else {
4670	// We found both of the successors we were looking for.
4671	// Create a conditional branch sharing the condition of the select.
4672	BranchInst *NewBI = Builder.CreateCondBr(Cond, True: TrueBB, False: FalseBB);
4673	if (TrueWeight != FalseWeight)
4674	setBranchWeights(I: NewBI, TrueWeight, FalseWeight);
4675	}
4676	} else if (KeepEdge1 && (KeepEdge2 \|\| TrueBB == FalseBB)) {
4677	// Neither of the selected blocks were successors, so this
4678	// terminator must be unreachable.
4679	new UnreachableInst (OldTerm->getContext(), OldTerm->getIterator());
4680	} else {
4681	// One of the selected values was a successor, but the other wasn't.
4682	// Insert an unconditional branch to the one that was found;
4683	// the edge to the one that wasn't must be unreachable.
4684	if (!KeepEdge1) {
4685	// Only TrueBB was found.
4686	Builder.CreateBr(Dest: TrueBB);
4687	} else {
4688	// Only FalseBB was found.
4689	Builder.CreateBr(Dest: FalseBB);
4690	}
4691	}
4692
4693	EraseTerminatorAndDCECond(TI: OldTerm);
4694
4695	if (DTU) {
4696	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4697	Updates.reserve(N: RemovedSuccessors.size());
4698	for (auto *RemovedSuccessor : RemovedSuccessors)
4699	Updates.push_back(Elt: {DominatorTree::Delete, BB, RemovedSuccessor});
4700	DTU->applyUpdates(Updates);
4701	}
4702
4703	return true;
4704	}
4705
4706	// Replaces
4707	// (switch (select cond, X, Y)) on constant X, Y
4708	// with a branch - conditional if X and Y lead to distinct BBs,
4709	// unconditional otherwise.
4710	bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4711	SelectInst *Select) {
4712	// Check for constant integer values in the select.
4713	ConstantInt *TrueVal = dyn_cast<ConstantInt>(Val: Select->getTrueValue());
4714	ConstantInt *FalseVal = dyn_cast<ConstantInt>(Val: Select->getFalseValue());
4715	if (!TrueVal \|\| !FalseVal)
4716	return false;
4717
4718	// Find the relevant condition and destinations.
4719	Value *Condition = Select->getCondition();
4720	BasicBlock *TrueBB = SI->findCaseValue(C: TrueVal)->getCaseSuccessor();
4721	BasicBlock *FalseBB = SI->findCaseValue(C: FalseVal)->getCaseSuccessor();
4722
4723	// Get weight for TrueBB and FalseBB.
4724	uint32_t TrueWeight = `0`, FalseWeight = `0`;
4725	SmallVector<uint64_t, `8`> Weights;
4726	bool HasWeights = hasBranchWeightMD(I: *SI);
4727	if (HasWeights) {
4728	GetBranchWeights(TI: SI, Weights);
4729	if (Weights.size() == `1` + SI->getNumCases()) {
4730	TrueWeight =
4731	(uint32_t)Weights [SI->findCaseValue(C: TrueVal)->getSuccessorIndex()];
4732	FalseWeight =
4733	(uint32_t)Weights [SI->findCaseValue(C: FalseVal)->getSuccessorIndex()];
4734	}
4735	}
4736
4737	// Perform the actual simplification.
4738	return SimplifyTerminatorOnSelect(OldTerm: SI, Cond: Condition, TrueBB, FalseBB, TrueWeight,
4739	FalseWeight);
4740	}
4741
4742	// Replaces
4743	// (indirectbr (select cond, blockaddress(@fn, BlockA),
4744	// blockaddress(@fn, BlockB)))
4745	// with
4746	// (br cond, BlockA, BlockB).
4747	bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4748	SelectInst *SI) {
4749	// Check that both operands of the select are block addresses.
4750	BlockAddress *TBA = dyn_cast<BlockAddress>(Val: SI->getTrueValue());
4751	BlockAddress *FBA = dyn_cast<BlockAddress>(Val: SI->getFalseValue());
4752	if (!TBA \|\| !FBA)
4753	return false;
4754
4755	// Extract the actual blocks.
4756	BasicBlock *TrueBB = TBA->getBasicBlock();
4757	BasicBlock *FalseBB = FBA->getBasicBlock();
4758
4759	// Perform the actual simplification.
4760	return SimplifyTerminatorOnSelect(OldTerm: IBI, Cond: SI->getCondition(), TrueBB, FalseBB, TrueWeight: `0`,
4761	FalseWeight: `0`);
4762	}
4763
4764	/// This is called when we find an icmp instruction
4765	/// (a seteq/setne with a constant) as the only instruction in a
4766	/// block that ends with an uncond branch. We are looking for a very specific
4767	/// pattern that occurs when "A == 1 \|\| A == 2 \|\| A == 3" gets simplified. In
4768	/// this case, we merge the first two "or's of icmp" into a switch, but then the
4769	/// default value goes to an uncond block with a seteq in it, we get something
4770	/// like:
4771	///
4772	/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4773	/// DEFAULT:
4774	/// %tmp = icmp eq i8 %A, 92
4775	/// br label %end
4776	/// end:
4777	/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4778	///
4779	/// We prefer to split the edge to 'end' so that there is a true/false entry to
4780	/// the PHI, merging the third icmp into the switch.
4781	bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4782	ICmpInst *ICI, IRBuilder<> &Builder) {
4783	BasicBlock *BB = ICI->getParent();
4784
4785	// If the block has any PHIs in it or the icmp has multiple uses, it is too
4786	// complex.
4787	if (isa<PHINode>(Val: BB->begin()) \|\| !ICI->hasOneUse())
4788	return false;
4789
4790	Value *V = ICI->getOperand(i_nocapture: `0`);
4791	ConstantInt *Cst = cast<ConstantInt>(Val: ICI->getOperand(i_nocapture: `1`));
4792
4793	// The pattern we're looking for is where our only predecessor is a switch on
4794	// 'V' and this block is the default case for the switch. In this case we can
4795	// fold the compared value into the switch to simplify things.
4796	BasicBlock *Pred = BB->getSinglePredecessor();
4797	if (!Pred \|\| !isa<SwitchInst>(Val: Pred->getTerminator()))
4798	return false;
4799
4800	SwitchInst *SI = cast<SwitchInst>(Val: Pred->getTerminator());
4801	if (SI->getCondition() != V)
4802	return false;
4803
4804	// If BB is reachable on a non-default case, then we simply know the value of
4805	// V in this block. Substitute it and constant fold the icmp instruction
4806	// away.
4807	if (SI->getDefaultDest() != BB) {
4808	ConstantInt *VVal = SI->findCaseDest(BB);
4809	assert(VVal && "Should have a unique destination value");
4810	ICI->setOperand(i_nocapture: `0`, Val_nocapture: VVal);
4811
4812	if (Value *V = simplifyInstruction(I: ICI, Q: {DL, ICI})) {
4813	ICI->replaceAllUsesWith(V);
4814	ICI->eraseFromParent();
4815	}
4816	// BB is now empty, so it is likely to simplify away.
4817	return requestResimplify();
4818	}
4819
4820	// Ok, the block is reachable from the default dest. If the constant we're
4821	// comparing exists in one of the other edges, then we can constant fold ICI
4822	// and zap it.
4823	if (SI->findCaseValue(C: Cst) != SI->case_default()) {
4824	Value *V;
4825	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4826	V = ConstantInt::getFalse(Context&: BB->getContext());
4827	else
4828	V = ConstantInt::getTrue(Context&: BB->getContext());
4829
4830	ICI->replaceAllUsesWith(V);
4831	ICI->eraseFromParent();
4832	// BB is now empty, so it is likely to simplify away.
4833	return requestResimplify();
4834	}
4835
4836	// The use of the icmp has to be in the 'end' block, by the only PHI node in
4837	// the block.
4838	BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(Idx: `0`);
4839	PHINode *PHIUse = dyn_cast<PHINode>(Val: ICI->user_back());
4840	if (PHIUse == nullptr \|\| PHIUse != &SuccBlock->front() \|\|
4841	isa<PHINode>(Val: ++BasicBlock::iterator (PHIUse)))
4842	return false;
4843
4844	// If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4845	// true in the PHI.
4846	Constant *DefaultCst = ConstantInt::getTrue(Context&: BB->getContext());
4847	Constant *NewCst = ConstantInt::getFalse(Context&: BB->getContext());
4848
4849	if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4850	std::swap(a&: DefaultCst, b&: NewCst);
4851
4852	// Replace ICI (which is used by the PHI for the default value) with true or
4853	// false depending on if it is EQ or NE.
4854	ICI->replaceAllUsesWith(V: DefaultCst);
4855	ICI->eraseFromParent();
4856
4857	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4858
4859	// Okay, the switch goes to this block on a default value. Add an edge from
4860	// the switch to the merge point on the compared value.
4861	BasicBlock *NewBB =
4862	BasicBlock::Create(Context&: BB->getContext(), Name: "switch.edge", Parent: BB->getParent(), InsertBefore: BB);
4863	{
4864	SwitchInstProfUpdateWrapper SIW(*SI);
4865	auto W0 = SIW.getSuccessorWeight(idx: `0`);
4866	SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
4867	if (W0) {
4868	NewW = ((uint64_t(*W0) + `1`) >> `1`);
4869	SIW.setSuccessorWeight(idx: `0`, W: *NewW);
4870	}
4871	SIW.addCase(OnVal: Cst, Dest: NewBB, W: NewW);
4872	if (DTU)
4873	Updates.push_back(Elt: {DominatorTree::Insert, Pred, NewBB});
4874	}
4875
4876	// NewBB branches to the phi block, add the uncond branch and the phi entry.
4877	Builder.SetInsertPoint(NewBB);
4878	Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4879	Builder.CreateBr(Dest: SuccBlock);
4880	PHIUse->addIncoming(V: NewCst, BB: NewBB);
4881	if (DTU) {
4882	Updates.push_back(Elt: {DominatorTree::Insert, NewBB, SuccBlock});
4883	DTU->applyUpdates(Updates);
4884	}
4885	return true;
4886	}
4887
4888	/// The specified branch is a conditional branch.
4889	/// Check to see if it is branching on an or/and chain of icmp instructions, and
4890	/// fold it into a switch instruction if so.
4891	bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4892	IRBuilder<> &Builder,
4893	const DataLayout &DL) {
4894	Instruction *Cond = dyn_cast<Instruction>(Val: BI->getCondition());
4895	if (!Cond)
4896	return false;
4897
4898	// Change br (X == 0 \| X == 1), T, F into a switch instruction.
4899	// If this is a bunch of seteq's or'd together, or if it's a bunch of
4900	// 'setne's and'ed together, collect them.
4901
4902	// Try to gather values from a chain of and/or to be turned into a switch
4903	ConstantComparesGatherer ConstantCompare(Cond, DL);
4904	// Unpack the result
4905	SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4906	Value *CompVal = ConstantCompare.CompValue;
4907	unsigned UsedICmps = ConstantCompare.UsedICmps;
4908	Value *ExtraCase = ConstantCompare.Extra;
4909
4910	// If we didn't have a multiply compared value, fail.
4911	if (!CompVal)
4912	return false;
4913
4914	// Avoid turning single icmps into a switch.
4915	if (UsedICmps <= `1`)
4916	return false;
4917
4918	bool TrueWhenEqual = match(V: Cond, P: m_LogicalOr(L: m_Value(), R: m_Value()));
4919
4920	// There might be duplicate constants in the list, which the switch
4921	// instruction can't handle, remove them now.
4922	array_pod_sort(Start: Values.begin(), End: Values.end(), Compare: ConstantIntSortPredicate);
4923	Values.erase(CS: std::unique(first: Values.begin(), last: Values.end()), CE: Values.end());
4924
4925	// If Extra was used, we require at least two switch values to do the
4926	// transformation. A switch with one value is just a conditional branch.
4927	if (ExtraCase && Values.size() < `2`)
4928	return false;
4929
4930	// TODO: Preserve branch weight metadata, similarly to how
4931	// FoldValueComparisonIntoPredecessors preserves it.
4932
4933	// Figure out which block is which destination.
4934	BasicBlock *DefaultBB = BI->getSuccessor(i: `1`);
4935	BasicBlock *EdgeBB = BI->getSuccessor(i: `0`);
4936	if (!TrueWhenEqual)
4937	std::swap(a&: DefaultBB, b&: EdgeBB);
4938
4939	BasicBlock *BB = BI->getParent();
4940
4941	LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4942	<< " cases into SWITCH. BB is:\n"
4943	<< *BB);
4944
4945	SmallVector<DominatorTree::UpdateType, `2`> Updates;
4946
4947	// If there are any extra values that couldn't be folded into the switch
4948	// then we evaluate them with an explicit branch first. Split the block
4949	// right before the condbr to handle it.
4950	if (ExtraCase) {
4951	BasicBlock NewBB = SplitBlock(Old: BB, SplitPt: BI, DTU, /LI=/*nullptr,
4952	/MSSAU=/nullptr, BBName: "switch.early.test");
4953
4954	// Remove the uncond branch added to the old block.
4955	Instruction *OldTI = BB->getTerminator();
4956	Builder.SetInsertPoint(OldTI);
4957
4958	// There can be an unintended UB if extra values are Poison. Before the
4959	// transformation, extra values may not be evaluated according to the
4960	// condition, and it will not raise UB. But after transformation, we are
4961	// evaluating extra values before checking the condition, and it will raise
4962	// UB. It can be solved by adding freeze instruction to extra values.
4963	AssumptionCache *AC = Options.AC;
4964
4965	if (!isGuaranteedNotToBeUndefOrPoison(V: ExtraCase, AC, CtxI: BI, DT: nullptr))
4966	ExtraCase = Builder.CreateFreeze(V: ExtraCase);
4967
4968	if (TrueWhenEqual)
4969	Builder.CreateCondBr(Cond: ExtraCase, True: EdgeBB, False: NewBB);
4970	else
4971	Builder.CreateCondBr(Cond: ExtraCase, True: NewBB, False: EdgeBB);
4972
4973	OldTI->eraseFromParent();
4974
4975	if (DTU)
4976	Updates.push_back(Elt: {DominatorTree::Insert, BB, EdgeBB});
4977
4978	// If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4979	// for the edge we just added.
4980	AddPredecessorToBlock(Succ: EdgeBB, NewPred: BB, ExistPred: NewBB);
4981
4982	LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
4983	<< "\nEXTRABB = " << *BB);
4984	BB = NewBB;
4985	}
4986
4987	Builder.SetInsertPoint(BI);
4988	// Convert pointer to int before we switch.
4989	if (CompVal->getType()->isPointerTy()) {
4990	CompVal = Builder.CreatePtrToInt(
4991	V: CompVal, DestTy: DL.getIntPtrType(CompVal->getType()), Name: "magicptr");
4992	}
4993
4994	// Create the new switch instruction now.
4995	SwitchInst *New = Builder.CreateSwitch(V: CompVal, Dest: DefaultBB, NumCases: Values.size());
4996
4997	// Add all of the 'cases' to the switch instruction.
4998	for (unsigned i = `0`, e = Values.size(); i != e; ++i)
4999	New->addCase(OnVal: Values [i], Dest: EdgeBB);
5000
5001	// We added edges from PI to the EdgeBB. As such, if there were any
5002	// PHI nodes in EdgeBB, they need entries to be added corresponding to
5003	// the number of edges added.
5004	for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5005	PHINode *PN = cast<PHINode>(Val&: BBI);
5006	Value *InVal = PN->getIncomingValueForBlock(BB);
5007	for (unsigned i = `0`, e = Values.size() - `1`; i != e; ++i)
5008	PN->addIncoming(V: InVal, BB);
5009	}
5010
5011	// Erase the old branch instruction.
5012	EraseTerminatorAndDCECond(TI: BI);
5013	if (DTU)
5014	DTU->applyUpdates(Updates);
5015
5016	LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << `'\n'`);
5017	return true;
5018	}
5019
5020	bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5021	if (isa<PHINode>(Val: RI->getValue()))
5022	return simplifyCommonResume(RI);
5023	else if (isa<LandingPadInst>(Val: RI->getParent()->getFirstNonPHI()) &&
5024	RI->getValue() == RI->getParent()->getFirstNonPHI())
5025	// The resume must unwind the exception that caused control to branch here.
5026	return simplifySingleResume(RI);
5027
5028	return false;
5029	}
5030
5031	// Check if cleanup block is empty
5032	static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
5033	for (Instruction &I : R) {
5034	auto *II = dyn_cast<IntrinsicInst>(Val: &I);
5035	if (!II)
5036	return false;
5037
5038	Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5039	switch (IntrinsicID) {
5040	case Intrinsic::dbg_declare:
5041	case Intrinsic::dbg_value:
5042	case Intrinsic::dbg_label:
5043	case Intrinsic::lifetime_end:
5044	break;
5045	default:
5046	return false;
5047	}
5048	}
5049	return true;
5050	}
5051
5052	// Simplify resume that is shared by several landing pads (phi of landing pad).
5053	bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5054	BasicBlock *BB = RI->getParent();
5055
5056	// Check that there are no other instructions except for debug and lifetime
5057	// intrinsics between the phi's and resume instruction.
5058	if (!isCleanupBlockEmpty(
5059	R: make_range(x: RI->getParent()->getFirstNonPHI(), y: BB->getTerminator())))
5060	return false;
5061
5062	SmallSetVector<BasicBlock *, `4`> TrivialUnwindBlocks;
5063	auto *PhiLPInst = cast<PHINode>(Val: RI->getValue());
5064
5065	// Check incoming blocks to see if any of them are trivial.
5066	for (unsigned Idx = `0`, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5067	Idx++) {
5068	auto *IncomingBB = PhiLPInst->getIncomingBlock(i: Idx);
5069	auto *IncomingValue = PhiLPInst->getIncomingValue(i: Idx);
5070
5071	// If the block has other successors, we can not delete it because
5072	// it has other dependents.
5073	if (IncomingBB->getUniqueSuccessor() != BB)
5074	continue;
5075
5076	auto *LandingPad = dyn_cast<LandingPadInst>(Val: IncomingBB->getFirstNonPHI());
5077	// Not the landing pad that caused the control to branch here.
5078	if (IncomingValue != LandingPad)
5079	continue;
5080
5081	if (isCleanupBlockEmpty(
5082	R: make_range(x: LandingPad->getNextNode(), y: IncomingBB->getTerminator())))
5083	TrivialUnwindBlocks.insert(X: IncomingBB);
5084	}
5085
5086	// If no trivial unwind blocks, don't do any simplifications.
5087	if (TrivialUnwindBlocks.empty())
5088	return false;
5089
5090	// Turn all invokes that unwind here into calls.
5091	for (auto *TrivialBB : TrivialUnwindBlocks) {
5092	// Blocks that will be simplified should be removed from the phi node.
5093	// Note there could be multiple edges to the resume block, and we need
5094	// to remove them all.
5095	while (PhiLPInst->getBasicBlockIndex(BB: TrivialBB) != -`1`)
5096	BB->removePredecessor(Pred: TrivialBB, KeepOneInputPHIs: true);
5097
5098	for (BasicBlock *Pred :
5099	llvm::make_early_inc_range(Range: predecessors(BB: TrivialBB))) {
5100	removeUnwindEdge(BB: Pred, DTU);
5101	++NumInvokes;
5102	}
5103
5104	// In each SimplifyCFG run, only the current processed block can be erased.
5105	// Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5106	// of erasing TrivialBB, we only remove the branch to the common resume
5107	// block so that we can later erase the resume block since it has no
5108	// predecessors.
5109	TrivialBB->getTerminator()->eraseFromParent();
5110	new UnreachableInst (RI->getContext(), TrivialBB);
5111	if (DTU)
5112	DTU->applyUpdates(Updates: {{DominatorTree::Delete, TrivialBB, BB}});
5113	}
5114
5115	// Delete the resume block if all its predecessors have been removed.
5116	if (pred_empty(BB))
5117	DeleteDeadBlock(BB, DTU);
5118
5119	return !TrivialUnwindBlocks.empty();
5120	}
5121
5122	// Simplify resume that is only used by a single (non-phi) landing pad.
5123	bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5124	BasicBlock *BB = RI->getParent();
5125	auto *LPInst = cast<LandingPadInst>(Val: BB->getFirstNonPHI());
5126	assert(RI->getValue() == LPInst &&
5127	"Resume must unwind the exception that caused control to here");
5128
5129	// Check that there are no other instructions except for debug intrinsics.
5130	if (!isCleanupBlockEmpty(
5131	R: make_range<Instruction *>(x: LPInst->getNextNode(), y: RI)))
5132	return false;
5133
5134	// Turn all invokes that unwind here into calls and delete the basic block.
5135	for (BasicBlock *Pred : llvm::make_early_inc_range(Range: predecessors(BB))) {
5136	removeUnwindEdge(BB: Pred, DTU);
5137	++NumInvokes;
5138	}
5139
5140	// The landingpad is now unreachable. Zap it.
5141	DeleteDeadBlock(BB, DTU);
5142	return true;
5143	}
5144
5145	static bool removeEmptyCleanup(CleanupReturnInst RI, DomTreeUpdater DTU) {
5146	// If this is a trivial cleanup pad that executes no instructions, it can be
5147	// eliminated. If the cleanup pad continues to the caller, any predecessor
5148	// that is an EH pad will be updated to continue to the caller and any
5149	// predecessor that terminates with an invoke instruction will have its invoke
5150	// instruction converted to a call instruction. If the cleanup pad being
5151	// simplified does not continue to the caller, each predecessor will be
5152	// updated to continue to the unwind destination of the cleanup pad being
5153	// simplified.
5154	BasicBlock *BB = RI->getParent();
5155	CleanupPadInst *CPInst = RI->getCleanupPad();
5156	if (CPInst->getParent() != BB)
5157	// This isn't an empty cleanup.
5158	return false;
5159
5160	// We cannot kill the pad if it has multiple uses. This typically arises
5161	// from unreachable basic blocks.
5162	if (!CPInst->hasOneUse())
5163	return false;
5164
5165	// Check that there are no other instructions except for benign intrinsics.
5166	if (!isCleanupBlockEmpty(
5167	R: make_range<Instruction *>(x: CPInst->getNextNode(), y: RI)))
5168	return false;
5169
5170	// If the cleanup return we are simplifying unwinds to the caller, this will
5171	// set UnwindDest to nullptr.
5172	BasicBlock *UnwindDest = RI->getUnwindDest();
5173	Instruction DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr*;
5174
5175	// We're about to remove BB from the control flow. Before we do, sink any
5176	// PHINodes into the unwind destination. Doing this before changing the
5177	// control flow avoids some potentially slow checks, since we can currently
5178	// be certain that UnwindDest and BB have no common predecessors (since they
5179	// are both EH pads).
5180	if (UnwindDest) {
5181	// First, go through the PHI nodes in UnwindDest and update any nodes that
5182	// reference the block we are removing
5183	for (PHINode &DestPN : UnwindDest->phis()) {
5184	int Idx = DestPN.getBasicBlockIndex(BB);
5185	// Since BB unwinds to UnwindDest, it has to be in the PHI node.
5186	assert(Idx != -`1`);
5187	// This PHI node has an incoming value that corresponds to a control
5188	// path through the cleanup pad we are removing. If the incoming
5189	// value is in the cleanup pad, it must be a PHINode (because we
5190	// verified above that the block is otherwise empty). Otherwise, the
5191	// value is either a constant or a value that dominates the cleanup
5192	// pad being removed.
5193	//
5194	// Because BB and UnwindDest are both EH pads, all of their
5195	// predecessors must unwind to these blocks, and since no instruction
5196	// can have multiple unwind destinations, there will be no overlap in
5197	// incoming blocks between SrcPN and DestPN.
5198	Value *SrcVal = DestPN.getIncomingValue(i: Idx);
5199	PHINode *SrcPN = dyn_cast<PHINode>(Val: SrcVal);
5200
5201	bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5202	for (auto *Pred : predecessors(BB)) {
5203	Value *Incoming =
5204	NeedPHITranslation ? SrcPN->getIncomingValueForBlock(BB: Pred) : SrcVal;
5205	DestPN.addIncoming(V: Incoming, BB: Pred);
5206	}
5207	}
5208
5209	// Sink any remaining PHI nodes directly into UnwindDest.
5210	Instruction *InsertPt = DestEHPad;
5211	for (PHINode &PN : make_early_inc_range(Range: BB->phis())) {
5212	if (PN.use_empty() \|\| !PN.isUsedOutsideOfBlock(BB))
5213	// If the PHI node has no uses or all of its uses are in this basic
5214	// block (meaning they are debug or lifetime intrinsics), just leave
5215	// it. It will be erased when we erase BB below.
5216	continue;
5217
5218	// Otherwise, sink this PHI node into UnwindDest.
5219	// Any predecessors to UnwindDest which are not already represented
5220	// must be back edges which inherit the value from the path through
5221	// BB. In this case, the PHI value must reference itself.
5222	for (auto *pred : predecessors(BB: UnwindDest))
5223	if (pred != BB)
5224	PN.addIncoming(V: &PN, BB: pred);
5225	PN.moveBefore(MovePos: InsertPt);
5226	// Also, add a dummy incoming value for the original BB itself,
5227	// so that the PHI is well-formed until we drop said predecessor.
5228	PN.addIncoming(V: PoisonValue::get(T: PN.getType()), BB);
5229	}
5230	}
5231
5232	std::vector<DominatorTree::UpdateType> Updates;
5233
5234	// We use make_early_inc_range here because we will remove all predecessors.
5235	for (BasicBlock *PredBB : llvm::make_early_inc_range(Range: predecessors(BB))) {
5236	if (UnwindDest == nullptr) {
5237	if (DTU) {
5238	DTU->applyUpdates(Updates);
5239	Updates.clear();
5240	}
5241	removeUnwindEdge(BB: PredBB, DTU);
5242	++NumInvokes;
5243	} else {
5244	BB->removePredecessor(Pred: PredBB);
5245	Instruction *TI = PredBB->getTerminator();
5246	TI->replaceUsesOfWith(From: BB, To: UnwindDest);
5247	if (DTU) {
5248	Updates.push_back(x: {DominatorTree::Insert, PredBB, UnwindDest});
5249	Updates.push_back(x: {DominatorTree::Delete, PredBB, BB});
5250	}
5251	}
5252	}
5253
5254	if (DTU)
5255	DTU->applyUpdates(Updates);
5256
5257	DeleteDeadBlock(BB, DTU);
5258
5259	return true;
5260	}
5261
5262	// Try to merge two cleanuppads together.
5263	static bool mergeCleanupPad(CleanupReturnInst *RI) {
5264	// Skip any cleanuprets which unwind to caller, there is nothing to merge
5265	// with.
5266	BasicBlock *UnwindDest = RI->getUnwindDest();
5267	if (!UnwindDest)
5268	return false;
5269
5270	// This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5271	// be safe to merge without code duplication.
5272	if (UnwindDest->getSinglePredecessor() != RI->getParent())
5273	return false;
5274
5275	// Verify that our cleanuppad's unwind destination is another cleanuppad.
5276	auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(Val: &UnwindDest->front());
5277	if (!SuccessorCleanupPad)
5278	return false;
5279
5280	CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5281	// Replace any uses of the successor cleanupad with the predecessor pad
5282	// The only cleanuppad uses should be this cleanupret, it's cleanupret and
5283	// funclet bundle operands.
5284	SuccessorCleanupPad->replaceAllUsesWith(V: PredecessorCleanupPad);
5285	// Remove the old cleanuppad.
5286	SuccessorCleanupPad->eraseFromParent();
5287	// Now, we simply replace the cleanupret with a branch to the unwind
5288	// destination.
5289	BranchInst::Create(IfTrue: UnwindDest, InsertAtEnd: RI->getParent());
5290	RI->eraseFromParent();
5291
5292	return true;
5293	}
5294
5295	bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5296	// It is possible to transiantly have an undef cleanuppad operand because we
5297	// have deleted some, but not all, dead blocks.
5298	// Eventually, this block will be deleted.
5299	if (isa<UndefValue>(Val: RI->getOperand(i_nocapture: `0`)))
5300	return false;
5301
5302	if (mergeCleanupPad(RI))
5303	return true;
5304
5305	if (removeEmptyCleanup(RI, DTU))
5306	return true;
5307
5308	return false;
5309	}
5310
5311	// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5312	bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5313	BasicBlock *BB = UI->getParent();
5314
5315	bool Changed = false;
5316
5317	// Ensure that any debug-info records that used to occur after the Unreachable
5318	// are moved to in front of it -- otherwise they'll "dangle" at the end of
5319	// the block.
5320	BB->flushTerminatorDbgRecords();
5321
5322	// Debug-info records on the unreachable inst itself should be deleted, as
5323	// below we delete everything past the final executable instruction.
5324	UI->dropDbgRecords();
5325
5326	// If there are any instructions immediately before the unreachable that can
5327	// be removed, do so.
5328	while (UI->getIterator() != BB->begin()) {
5329	BasicBlock::iterator BBI = UI->getIterator();
5330	--BBI;
5331
5332	if (!isGuaranteedToTransferExecutionToSuccessor(I: &*BBI))
5333	break; // Can not drop any more instructions. We're done here.
5334	// Otherwise, this instruction can be freely erased,
5335	// even if it is not side-effect free.
5336
5337	// Note that deleting EH's here is in fact okay, although it involves a bit
5338	// of subtle reasoning. If this inst is an EH, all the predecessors of this
5339	// block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5340	// and we can therefore guarantee this block will be erased.
5341
5342	// If we're deleting this, we're deleting any subsequent debug info, so
5343	// delete DbgRecords.
5344	BBI ->dropDbgRecords();
5345
5346	// Delete this instruction (any uses are guaranteed to be dead)
5347	BBI ->replaceAllUsesWith(V: PoisonValue::get(T: BBI ->getType()));
5348	BBI ->eraseFromParent();
5349	Changed = true;
5350	}
5351
5352	// If the unreachable instruction is the first in the block, take a gander
5353	// at all of the predecessors of this instruction, and simplify them.
5354	if (&BB->front() != UI)
5355	return Changed;
5356
5357	std::vector<DominatorTree::UpdateType> Updates;
5358
5359	SmallSetVector<BasicBlock *, `8`> Preds(pred_begin(BB), pred_end(BB));
5360	for (unsigned i = `0`, e = Preds.size(); i != e; ++i) {
5361	auto *Predecessor = Preds [i];
5362	Instruction *TI = Predecessor->getTerminator();
5363	IRBuilder<> Builder(TI);
5364	if (auto *BI = dyn_cast<BranchInst>(Val: TI)) {
5365	// We could either have a proper unconditional branch,
5366	// or a degenerate conditional branch with matching destinations.
5367	if (all_of(Range: BI->successors(),
5368	P: [BB](auto Successor) { return* Successor == BB; })) {
5369	new UnreachableInst (TI->getContext(), TI->getIterator());
5370	TI->eraseFromParent();
5371	Changed = true;
5372	} else {
5373	assert(BI->isConditional() && "Can't get here with an uncond branch.");
5374	Value* Cond = BI->getCondition();
5375	assert(BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
5376	"The destinations are guaranteed to be different here.");
5377	CallInst *Assumption;
5378	if (BI->getSuccessor(i: `0`) == BB) {
5379	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
5380	Builder.CreateBr(Dest: BI->getSuccessor(i: `1`));
5381	} else {
5382	assert(BI->getSuccessor(`1`) == BB && "Incorrect CFG");
5383	Assumption = Builder.CreateAssumption(Cond);
5384	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`));
5385	}
5386	if (Options.AC)
5387	Options.AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
5388
5389	EraseTerminatorAndDCECond(TI: BI);
5390	Changed = true;
5391	}
5392	if (DTU)
5393	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5394	} else if (auto *SI = dyn_cast<SwitchInst>(Val: TI)) {
5395	SwitchInstProfUpdateWrapper SU(*SI);
5396	for (auto i = SU ->case_begin(), e = SU ->case_end(); i != e;) {
5397	if (i ->getCaseSuccessor() != BB) {
5398	++i;
5399	continue;
5400	}
5401	BB->removePredecessor(Pred: SU ->getParent());
5402	i = SU.removeCase(I: i);
5403	e = SU ->case_end();
5404	Changed = true;
5405	}
5406	// Note that the default destination can't be removed!
5407	if (DTU && SI->getDefaultDest() != BB)
5408	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5409	} else if (auto *II = dyn_cast<InvokeInst>(Val: TI)) {
5410	if (II->getUnwindDest() == BB) {
5411	if (DTU) {
5412	DTU->applyUpdates(Updates);
5413	Updates.clear();
5414	}
5415	auto *CI = cast<CallInst>(Val: removeUnwindEdge(BB: TI->getParent(), DTU));
5416	if (!CI->doesNotThrow())
5417	CI->setDoesNotThrow();
5418	Changed = true;
5419	}
5420	} else if (auto *CSI = dyn_cast<CatchSwitchInst>(Val: TI)) {
5421	if (CSI->getUnwindDest() == BB) {
5422	if (DTU) {
5423	DTU->applyUpdates(Updates);
5424	Updates.clear();
5425	}
5426	removeUnwindEdge(BB: TI->getParent(), DTU);
5427	Changed = true;
5428	continue;
5429	}
5430
5431	for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5432	E = CSI->handler_end();
5433	I != E; ++I) {
5434	if (*I == BB) {
5435	CSI->removeHandler(HI: I);
5436	--I;
5437	--E;
5438	Changed = true;
5439	}
5440	}
5441	if (DTU)
5442	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5443	if (CSI->getNumHandlers() == `0`) {
5444	if (CSI->hasUnwindDest()) {
5445	// Redirect all predecessors of the block containing CatchSwitchInst
5446	// to instead branch to the CatchSwitchInst's unwind destination.
5447	if (DTU) {
5448	for (auto *PredecessorOfPredecessor : predecessors(BB: Predecessor)) {
5449	Updates.push_back(x: {DominatorTree::Insert,
5450	PredecessorOfPredecessor,
5451	CSI->getUnwindDest()});
5452	Updates.push_back(x: {DominatorTree::Delete,
5453	PredecessorOfPredecessor, Predecessor});
5454	}
5455	}
5456	Predecessor->replaceAllUsesWith(V: CSI->getUnwindDest());
5457	} else {
5458	// Rewrite all preds to unwind to caller (or from invoke to call).
5459	if (DTU) {
5460	DTU->applyUpdates(Updates);
5461	Updates.clear();
5462	}
5463	SmallVector<BasicBlock *, `8`> EHPreds(predecessors(BB: Predecessor));
5464	for (BasicBlock *EHPred : EHPreds)
5465	removeUnwindEdge(BB: EHPred, DTU);
5466	}
5467	// The catchswitch is no longer reachable.
5468	new UnreachableInst (CSI->getContext(), CSI->getIterator());
5469	CSI->eraseFromParent();
5470	Changed = true;
5471	}
5472	} else if (auto *CRI = dyn_cast<CleanupReturnInst>(Val: TI)) {
5473	(void)CRI;
5474	assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5475	"Expected to always have an unwind to BB.");
5476	if (DTU)
5477	Updates.push_back(x: {DominatorTree::Delete, Predecessor, BB});
5478	new UnreachableInst (TI->getContext(), TI->getIterator());
5479	TI->eraseFromParent();
5480	Changed = true;
5481	}
5482	}
5483
5484	if (DTU)
5485	DTU->applyUpdates(Updates);
5486
5487	// If this block is now dead, remove it.
5488	if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5489	DeleteDeadBlock(BB, DTU);
5490	return true;
5491	}
5492
5493	return Changed;
5494	}
5495
5496	static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
5497	assert(Cases.size() >= `1`);
5498
5499	array_pod_sort(Start: Cases.begin(), End: Cases.end(), Compare: ConstantIntSortPredicate);
5500	for (size_t I = `1`, E = Cases.size(); I != E; ++I) {
5501	if (Cases [I - `1`]->getValue() != Cases [I]->getValue() + `1`)
5502	return false;
5503	}
5504	return true;
5505	}
5506
5507	static void createUnreachableSwitchDefault(SwitchInst *Switch,
5508	DomTreeUpdater *DTU) {
5509	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5510	auto *BB = Switch->getParent();
5511	auto *OrigDefaultBlock = Switch->getDefaultDest();
5512	OrigDefaultBlock->removePredecessor(Pred: BB);
5513	BasicBlock *NewDefaultBlock = BasicBlock::Create(
5514	Context&: BB->getContext(), Name: BB->getName() + ".unreachabledefault", Parent: BB->getParent(),
5515	InsertBefore: OrigDefaultBlock);
5516	new UnreachableInst (Switch->getContext(), NewDefaultBlock);
5517	Switch->setDefaultDest(&*NewDefaultBlock);
5518	if (DTU) {
5519	SmallVector<DominatorTree::UpdateType, `2`> Updates;
5520	Updates.push_back(Elt: {DominatorTree::Insert, BB, &*NewDefaultBlock});
5521	if (!is_contained(Range: successors(BB), Element: OrigDefaultBlock))
5522	Updates.push_back(Elt: {DominatorTree::Delete, BB, &*OrigDefaultBlock});
5523	DTU->applyUpdates(Updates);
5524	}
5525	}
5526
5527	/// Turn a switch into an integer range comparison and branch.
5528	/// Switches with more than 2 destinations are ignored.
5529	/// Switches with 1 destination are also ignored.
5530	bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5531	IRBuilder<> &Builder) {
5532	assert(SI->getNumCases() > `1` && "Degenerate switch?");
5533
5534	bool HasDefault =
5535	!isa<UnreachableInst>(Val: SI->getDefaultDest()->getFirstNonPHIOrDbg());
5536
5537	auto *BB = SI->getParent();
5538
5539	// Partition the cases into two sets with different destinations.
5540	BasicBlock DestA = HasDefault ? SI->getDefaultDest() : nullptr*;
5541	BasicBlock DestB = nullptr*;
5542	SmallVector<ConstantInt *, `16`> CasesA;
5543	SmallVector<ConstantInt *, `16`> CasesB;
5544
5545	for (auto Case : SI->cases()) {
5546	BasicBlock *Dest = Case.getCaseSuccessor();
5547	if (!DestA)
5548	DestA = Dest;
5549	if (Dest == DestA) {
5550	CasesA.push_back(Elt: Case.getCaseValue());
5551	continue;
5552	}
5553	if (!DestB)
5554	DestB = Dest;
5555	if (Dest == DestB) {
5556	CasesB.push_back(Elt: Case.getCaseValue());
5557	continue;
5558	}
5559	return false; // More than two destinations.
5560	}
5561	if (!DestB)
5562	return false; // All destinations are the same and the default is unreachable
5563
5564	assert(DestA && DestB &&
5565	"Single-destination switch should have been folded.");
5566	assert(DestA != DestB);
5567	assert(DestB != SI->getDefaultDest());
5568	assert(!CasesB.empty() && "There must be non-default cases.");
5569	assert(!CasesA.empty() \|\| HasDefault);
5570
5571	// Figure out if one of the sets of cases form a contiguous range.
5572	SmallVectorImpl<ConstantInt > ContiguousCases = nullptr;
5573	BasicBlock ContiguousDest = nullptr*;
5574	BasicBlock OtherDest = nullptr*;
5575	if (!CasesA.empty() && CasesAreContiguous(Cases&: CasesA)) {
5576	ContiguousCases = &CasesA;
5577	ContiguousDest = DestA;
5578	OtherDest = DestB;
5579	} else if (CasesAreContiguous(Cases&: CasesB)) {
5580	ContiguousCases = &CasesB;
5581	ContiguousDest = DestB;
5582	OtherDest = DestA;
5583	} else
5584	return false;
5585
5586	// Start building the compare and branch.
5587
5588	Constant *Offset = ConstantExpr::getNeg(C: ContiguousCases->back());
5589	Constant *NumCases =
5590	ConstantInt::get(Ty: Offset->getType(), V: ContiguousCases->size());
5591
5592	Value *Sub = SI->getCondition();
5593	if (!Offset->isNullValue())
5594	Sub = Builder.CreateAdd(LHS: Sub, RHS: Offset, Name: Sub->getName() + ".off");
5595
5596	Value *Cmp;
5597	// If NumCases overflowed, then all possible values jump to the successor.
5598	if (NumCases->isNullValue() && !ContiguousCases->empty())
5599	Cmp = ConstantInt::getTrue(Context&: SI->getContext());
5600	else
5601	Cmp = Builder.CreateICmpULT(LHS: Sub, RHS: NumCases, Name: "switch");
5602	BranchInst *NewBI = Builder.CreateCondBr(Cond: Cmp, True: ContiguousDest, False: OtherDest);
5603
5604	// Update weight for the newly-created conditional branch.
5605	if (hasBranchWeightMD(I: *SI)) {
5606	SmallVector<uint64_t, `8`> Weights;
5607	GetBranchWeights(TI: SI, Weights);
5608	if (Weights.size() == `1` + SI->getNumCases()) {
5609	uint64_t TrueWeight = `0`;
5610	uint64_t FalseWeight = `0`;
5611	for (size_t I = `0`, E = Weights.size(); I != E; ++I) {
5612	if (SI->getSuccessor(idx: I) == ContiguousDest)
5613	TrueWeight += Weights [I];
5614	else
5615	FalseWeight += Weights [I];
5616	}
5617	while (TrueWeight > UINT32_MAX \|\| FalseWeight > UINT32_MAX) {
5618	TrueWeight /= `2`;
5619	FalseWeight /= `2`;
5620	}
5621	setBranchWeights(I: NewBI, TrueWeight, FalseWeight);
5622	}
5623	}
5624
5625	// Prune obsolete incoming values off the successors' PHI nodes.
5626	for (auto BBI = ContiguousDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5627	unsigned PreviousEdges = ContiguousCases->size();
5628	if (ContiguousDest == SI->getDefaultDest())
5629	++PreviousEdges;
5630	for (unsigned I = `0`, E = PreviousEdges - `1`; I != E; ++I)
5631	cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5632	}
5633	for (auto BBI = OtherDest->begin(); isa<PHINode>(Val: BBI); ++BBI) {
5634	unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5635	if (OtherDest == SI->getDefaultDest())
5636	++PreviousEdges;
5637	for (unsigned I = `0`, E = PreviousEdges - `1`; I != E; ++I)
5638	cast<PHINode>(Val&: BBI)->removeIncomingValue(BB: SI->getParent());
5639	}
5640
5641	// Clean up the default block - it may have phis or other instructions before
5642	// the unreachable terminator.
5643	if (!HasDefault)
5644	createUnreachableSwitchDefault(Switch: SI, DTU);
5645
5646	auto *UnreachableDefault = SI->getDefaultDest();
5647
5648	// Drop the switch.
5649	SI->eraseFromParent();
5650
5651	if (!HasDefault && DTU)
5652	DTU->applyUpdates(Updates: {{DominatorTree::Delete, BB, UnreachableDefault}});
5653
5654	return true;
5655	}
5656
5657	/// Compute masked bits for the condition of a switch
5658	/// and use it to remove dead cases.
5659	static bool eliminateDeadSwitchCases(SwitchInst SI, DomTreeUpdater DTU,
5660	AssumptionCache *AC,
5661	const DataLayout &DL) {
5662	Value *Cond = SI->getCondition();
5663	KnownBits Known = computeKnownBits(V: Cond, DL, Depth: `0`, AC, CxtI: SI);
5664
5665	// We can also eliminate cases by determining that their values are outside of
5666	// the limited range of the condition based on how many significant (non-sign)
5667	// bits are in the condition value.
5668	unsigned MaxSignificantBitsInCond =
5669	ComputeMaxSignificantBits(Op: Cond, DL, Depth: `0`, AC, CxtI: SI);
5670
5671	// Gather dead cases.
5672	SmallVector<ConstantInt *, `8`> DeadCases;
5673	SmallDenseMap<BasicBlock , int*, `8`> NumPerSuccessorCases;
5674	SmallVector<BasicBlock *, `8`> UniqueSuccessors;
5675	for (const auto &Case : SI->cases()) {
5676	auto *Successor = Case.getCaseSuccessor();
5677	if (DTU) {
5678	if (!NumPerSuccessorCases.count(Val: Successor))
5679	UniqueSuccessors.push_back(Elt: Successor);
5680	++NumPerSuccessorCases [Successor];
5681	}
5682	const APInt &CaseVal = Case.getCaseValue()->getValue();
5683	if (Known.Zero.intersects(RHS: CaseVal) \|\| !Known.One.isSubsetOf(RHS: CaseVal) \|\|
5684	(CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5685	DeadCases.push_back(Elt: Case.getCaseValue());
5686	if (DTU)
5687	--NumPerSuccessorCases [Successor];
5688	LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5689	<< " is dead.\n");
5690	}
5691	}
5692
5693	// If we can prove that the cases must cover all possible values, the
5694	// default destination becomes dead and we can remove it. If we know some
5695	// of the bits in the value, we can use that to more precisely compute the
5696	// number of possible unique case values.
5697	bool HasDefault =
5698	!isa<UnreachableInst>(Val: SI->getDefaultDest()->getFirstNonPHIOrDbg());
5699	const unsigned NumUnknownBits =
5700	Known.getBitWidth() - (Known.Zero \| Known.One).popcount();
5701	assert(NumUnknownBits <= Known.getBitWidth());
5702	if (HasDefault && DeadCases.empty() &&
5703	NumUnknownBits < `64` / avoid overflow / &&
5704	SI->getNumCases() == (`1ULL` << NumUnknownBits)) {
5705	createUnreachableSwitchDefault(Switch: SI, DTU);
5706	return true;
5707	}
5708
5709	if (DeadCases.empty())
5710	return false;
5711
5712	SwitchInstProfUpdateWrapper SIW(*SI);
5713	for (ConstantInt *DeadCase : DeadCases) {
5714	SwitchInst::CaseIt CaseI = SI->findCaseValue(C: DeadCase);
5715	assert(CaseI != SI->case_default() &&
5716	"Case was not found. Probably mistake in DeadCases forming.");
5717	// Prune unused values from PHI nodes.
5718	CaseI ->getCaseSuccessor()->removePredecessor(Pred: SI->getParent());
5719	SIW.removeCase(I: CaseI);
5720	}
5721
5722	if (DTU) {
5723	std::vector<DominatorTree::UpdateType> Updates;
5724	for (auto *Successor : UniqueSuccessors)
5725	if (NumPerSuccessorCases [Successor] == `0`)
5726	Updates.push_back(x: {DominatorTree::Delete, SI->getParent(), Successor});
5727	DTU->applyUpdates(Updates);
5728	}
5729
5730	return true;
5731	}
5732
5733	/// If BB would be eligible for simplification by
5734	/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5735	/// by an unconditional branch), look at the phi node for BB in the successor
5736	/// block and see if the incoming value is equal to CaseValue. If so, return
5737	/// the phi node, and set PhiIndex to BB's index in the phi node.
5738	static PHINode FindPHIForConditionForwarding(ConstantInt CaseValue,
5739	BasicBlock BB, int* *PhiIndex) {
5740	if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5741	return nullptr; // BB must be empty to be a candidate for simplification.
5742	if (!BB->getSinglePredecessor())
5743	return nullptr; // BB must be dominated by the switch.
5744
5745	BranchInst *Branch = dyn_cast<BranchInst>(Val: BB->getTerminator());
5746	if (!Branch \|\| !Branch->isUnconditional())
5747	return nullptr; // Terminator must be unconditional branch.
5748
5749	BasicBlock *Succ = Branch->getSuccessor(i: `0`);
5750
5751	for (PHINode &PHI : Succ->phis()) {
5752	int Idx = PHI.getBasicBlockIndex(BB);
5753	assert(Idx >= `0` && "PHI has no entry for predecessor?");
5754
5755	Value *InValue = PHI.getIncomingValue(i: Idx);
5756	if (InValue != CaseValue)
5757	continue;
5758
5759	*PhiIndex = Idx;
5760	return &PHI;
5761	}
5762
5763	return nullptr;
5764	}
5765
5766	/// Try to forward the condition of a switch instruction to a phi node
5767	/// dominated by the switch, if that would mean that some of the destination
5768	/// blocks of the switch can be folded away. Return true if a change is made.
5769	static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
5770	using ForwardingNodesMap = DenseMap<PHINode , SmallVector<int*, `4`>>;
5771
5772	ForwardingNodesMap ForwardingNodes;
5773	BasicBlock *SwitchBlock = SI->getParent();
5774	bool Changed = false;
5775	for (const auto &Case : SI->cases()) {
5776	ConstantInt *CaseValue = Case.getCaseValue();
5777	BasicBlock *CaseDest = Case.getCaseSuccessor();
5778
5779	// Replace phi operands in successor blocks that are using the constant case
5780	// value rather than the switch condition variable:
5781	// switchbb:
5782	// switch i32 %x, label %default [
5783	// i32 17, label %succ
5784	// ...
5785	// succ:
5786	// %r = phi i32 ... [ 17, %switchbb ] ...
5787	// -->
5788	// %r = phi i32 ... [ %x, %switchbb ] ...
5789
5790	for (PHINode &Phi : CaseDest->phis()) {
5791	// This only works if there is exactly 1 incoming edge from the switch to
5792	// a phi. If there is >1, that means multiple cases of the switch map to 1
5793	// value in the phi, and that phi value is not the switch condition. Thus,
5794	// this transform would not make sense (the phi would be invalid because
5795	// a phi can't have different incoming values from the same block).
5796	int SwitchBBIdx = Phi.getBasicBlockIndex(BB: SwitchBlock);
5797	if (Phi.getIncomingValue(i: SwitchBBIdx) == CaseValue &&
5798	count(Range: Phi.blocks(), Element: SwitchBlock) == `1`) {
5799	Phi.setIncomingValue(i: SwitchBBIdx, V: SI->getCondition());
5800	Changed = true;
5801	}
5802	}
5803
5804	// Collect phi nodes that are indirectly using this switch's case constants.
5805	int PhiIdx;
5806	if (auto *Phi = FindPHIForConditionForwarding(CaseValue, BB: CaseDest, PhiIndex: &PhiIdx))
5807	ForwardingNodes [Phi].push_back(Elt: PhiIdx);
5808	}
5809
5810	for (auto &ForwardingNode : ForwardingNodes) {
5811	PHINode *Phi = ForwardingNode.first;
5812	SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5813	if (Indexes.size() < `2`)
5814	continue;
5815
5816	for (int Index : Indexes)
5817	Phi->setIncomingValue(i: Index, V: SI->getCondition());
5818	Changed = true;
5819	}
5820
5821	return Changed;
5822	}
5823
5824	/// Return true if the backend will be able to handle
5825	/// initializing an array of constants like C.
5826	static bool ValidLookupTableConstant(Constant C, const* TargetTransformInfo &TTI) {
5827	if (C->isThreadDependent())
5828	return false;
5829	if (C->isDLLImportDependent())
5830	return false;
5831
5832	if (!isa<ConstantFP>(Val: C) && !isa<ConstantInt>(Val: C) &&
5833	!isa<ConstantPointerNull>(Val: C) && !isa<GlobalValue>(Val: C) &&
5834	!isa<UndefValue>(Val: C) && !isa<ConstantExpr>(Val: C))
5835	return false;
5836
5837	if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val: C)) {
5838	// Pointer casts and in-bounds GEPs will not prohibit the backend from
5839	// materializing the array of constants.
5840	Constant *StrippedC = cast<Constant>(Val: CE->stripInBoundsConstantOffsets());
5841	if (StrippedC == C \|\| !ValidLookupTableConstant(C: StrippedC, TTI))
5842	return false;
5843	}
5844
5845	if (!TTI.shouldBuildLookupTablesForConstant(C))
5846	return false;
5847
5848	return true;
5849	}
5850
5851	/// If V is a Constant, return it. Otherwise, try to look up
5852	/// its constant value in ConstantPool, returning 0 if it's not there.
5853	static Constant *
5854	LookupConstant(Value *V,
5855	const SmallDenseMap<Value , Constant > &ConstantPool) {
5856	if (Constant *C = dyn_cast<Constant>(Val: V))
5857	return C;
5858	return ConstantPool.lookup(Val: V);
5859	}
5860
5861	/// Try to fold instruction I into a constant. This works for
5862	/// simple instructions such as binary operations where both operands are
5863	/// constant or can be replaced by constants from the ConstantPool. Returns the
5864	/// resulting constant on success, 0 otherwise.
5865	static Constant *
5866	ConstantFold(Instruction I, const* DataLayout &DL,
5867	const SmallDenseMap<Value , Constant > &ConstantPool) {
5868	if (SelectInst *Select = dyn_cast<SelectInst>(Val: I)) {
5869	Constant *A = LookupConstant(V: Select->getCondition(), ConstantPool);
5870	if (!A)
5871	return nullptr;
5872	if (A->isAllOnesValue())
5873	return LookupConstant(V: Select->getTrueValue(), ConstantPool);
5874	if (A->isNullValue())
5875	return LookupConstant(V: Select->getFalseValue(), ConstantPool);
5876	return nullptr;
5877	}
5878
5879	SmallVector<Constant *, `4`> COps;
5880	for (unsigned N = `0`, E = I->getNumOperands(); N != E; ++N) {
5881	if (Constant *A = LookupConstant(V: I->getOperand(i: N), ConstantPool))
5882	COps.push_back(Elt: A);
5883	else
5884	return nullptr;
5885	}
5886
5887	return ConstantFoldInstOperands(I, Ops: COps, DL);
5888	}
5889
5890	/// Try to determine the resulting constant values in phi nodes
5891	/// at the common destination basic block, CommonDest, for one of the case*
5892	/// destionations CaseDest corresponding to value CaseVal (0 for the default
5893	/// case), of a switch instruction SI.
5894	static bool
5895	getCaseResults(SwitchInst SI, ConstantInt CaseVal, BasicBlock *CaseDest,
5896	BasicBlock **CommonDest,
5897	SmallVectorImpl<std::pair<PHINode , Constant >> &Res,
5898	const DataLayout &DL, const TargetTransformInfo &TTI) {
5899	// The block from which we enter the common destination.
5900	BasicBlock *Pred = SI->getParent();
5901
5902	// If CaseDest is empty except for some side-effect free instructions through
5903	// which we can constant-propagate the CaseVal, continue to its successor.
5904	SmallDenseMap<Value , Constant > ConstantPool;
5905	ConstantPool.insert(KV: std::make_pair(x: SI->getCondition(), y&: CaseVal));
5906	for (Instruction &I : CaseDest->instructionsWithoutDebug(SkipPseudoOp: false)) {
5907	if (I.isTerminator()) {
5908	// If the terminator is a simple branch, continue to the next block.
5909	if (I.getNumSuccessors() != `1` \|\| I.isSpecialTerminator())
5910	return false;
5911	Pred = CaseDest;
5912	CaseDest = I.getSuccessor(Idx: `0`);
5913	} else if (Constant *C = ConstantFold(I: &I, DL, ConstantPool)) {
5914	// Instruction is side-effect free and constant.
5915
5916	// If the instruction has uses outside this block or a phi node slot for
5917	// the block, it is not safe to bypass the instruction since it would then
5918	// no longer dominate all its uses.
5919	for (auto &Use : I.uses()) {
5920	User *User = Use.getUser();
5921	if (Instruction *I = dyn_cast<Instruction>(Val: User))
5922	if (I->getParent() == CaseDest)
5923	continue;
5924	if (PHINode *Phi = dyn_cast<PHINode>(Val: User))
5925	if (Phi->getIncomingBlock(U: Use) == CaseDest)
5926	continue;
5927	return false;
5928	}
5929
5930	ConstantPool.insert(KV: std::make_pair(x: &I, y&: C));
5931	} else {
5932	break;
5933	}
5934	}
5935
5936	// If we did not have a CommonDest before, use the current one.
5937	if (!*CommonDest)
5938	*CommonDest = CaseDest;
5939	// If the destination isn't the common one, abort.
5940	if (CaseDest != *CommonDest)
5941	return false;
5942
5943	// Get the values for this case from phi nodes in the destination block.
5944	for (PHINode &PHI : (*CommonDest)->phis()) {
5945	int Idx = PHI.getBasicBlockIndex(BB: Pred);
5946	if (Idx == -`1`)
5947	continue;
5948
5949	Constant *ConstVal =
5950	LookupConstant(V: PHI.getIncomingValue(i: Idx), ConstantPool);
5951	if (!ConstVal)
5952	return false;
5953
5954	// Be conservative about which kinds of constants we support.
5955	if (!ValidLookupTableConstant(C: ConstVal, TTI))
5956	return false;
5957
5958	Res.push_back(Elt: std::make_pair(x: &PHI, y&: ConstVal));
5959	}
5960
5961	return Res.size() > `0`;
5962	}
5963
5964	// Helper function used to add CaseVal to the list of cases that generate
5965	// Result. Returns the updated number of cases that generate this result.
5966	static size_t mapCaseToResult(ConstantInt *CaseVal,
5967	SwitchCaseResultVectorTy &UniqueResults,
5968	Constant *Result) {
5969	for (auto &I : UniqueResults) {
5970	if (I.first == Result) {
5971	I.second.push_back(Elt: CaseVal);
5972	return I.second.size();
5973	}
5974	}
5975	UniqueResults.push_back(
5976	Elt: std::make_pair(x&: Result, y: SmallVector<ConstantInt *, `4`>(`1`, CaseVal)));
5977	return `1`;
5978	}
5979
5980	// Helper function that initializes a map containing
5981	// results for the PHI node of the common destination block for a switch
5982	// instruction. Returns false if multiple PHI nodes have been found or if
5983	// there is not a common destination block for the switch.
5984	static bool initializeUniqueCases(SwitchInst SI, PHINode &PHI,
5985	BasicBlock *&CommonDest,
5986	SwitchCaseResultVectorTy &UniqueResults,
5987	Constant *&DefaultResult,
5988	const DataLayout &DL,
5989	const TargetTransformInfo &TTI,
5990	uintptr_t MaxUniqueResults) {
5991	for (const auto &I : SI->cases()) {
5992	ConstantInt *CaseVal = I.getCaseValue();
5993
5994	// Resulting value at phi nodes for this case value.
5995	SwitchCaseResultsTy Results;
5996	if (!getCaseResults(SI, CaseVal, CaseDest: I.getCaseSuccessor(), CommonDest: &CommonDest, Res&: Results,
5997	DL, TTI))
5998	return false;
5999
6000	// Only one value per case is permitted.
6001	if (Results.size() > `1`)
6002	return false;
6003
6004	// Add the case->result mapping to UniqueResults.
6005	const size_t NumCasesForResult =
6006	mapCaseToResult(CaseVal, UniqueResults, Result: Results.begin()->second);
6007
6008	// Early out if there are too many cases for this result.
6009	if (NumCasesForResult > MaxSwitchCasesPerResult)
6010	return false;
6011
6012	// Early out if there are too many unique results.
6013	if (UniqueResults.size() > MaxUniqueResults)
6014	return false;
6015
6016	// Check the PHI consistency.
6017	if (!PHI)
6018	PHI = Results [`0`].first;
6019	else if (PHI != Results [`0`].first)
6020	return false;
6021	}
6022	// Find the default result value.
6023	SmallVector<std::pair<PHINode , Constant >, `1`> DefaultResults;
6024	BasicBlock *DefaultDest = SI->getDefaultDest();
6025	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest, Res&: DefaultResults,
6026	DL, TTI);
6027	// If the default value is not found abort unless the default destination
6028	// is unreachable.
6029	DefaultResult =
6030	DefaultResults.size() == `1` ? DefaultResults.begin()->second : nullptr;
6031	if ((!DefaultResult &&
6032	!isa<UnreachableInst>(Val: DefaultDest->getFirstNonPHIOrDbg())))
6033	return false;
6034
6035	return true;
6036	}
6037
6038	// Helper function that checks if it is possible to transform a switch with only
6039	// two cases (or two cases + default) that produces a result into a select.
6040	// TODO: Handle switches with more than 2 cases that map to the same result.
6041	static Value foldSwitchToSelect(const* SwitchCaseResultVectorTy &ResultVector,
6042	Constant DefaultResult, Value Condition,
6043	IRBuilder<> &Builder) {
6044	// If we are selecting between only two cases transform into a simple
6045	// select or a two-way select if default is possible.
6046	// Example:
6047	// switch (a) { %0 = icmp eq i32 %a, 10
6048	// case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6049	// case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6050	// default: return 4; %3 = select i1 %2, i32 2, i32 %1
6051	// }
6052	if (ResultVector.size() == `2` && ResultVector [`0`].second.size() == `1` &&
6053	ResultVector [`1`].second.size() == `1`) {
6054	ConstantInt *FirstCase = ResultVector [`0`].second [`0`];
6055	ConstantInt *SecondCase = ResultVector [`1`].second [`0`];
6056	Value *SelectValue = ResultVector [`1`].first;
6057	if (DefaultResult) {
6058	Value *ValueCompare =
6059	Builder.CreateICmpEQ(LHS: Condition, RHS: SecondCase, Name: "switch.selectcmp");
6060	SelectValue = Builder.CreateSelect(C: ValueCompare, True: ResultVector [`1`].first,
6061	False: DefaultResult, Name: "switch.select");
6062	}
6063	Value *ValueCompare =
6064	Builder.CreateICmpEQ(LHS: Condition, RHS: FirstCase, Name: "switch.selectcmp");
6065	return Builder.CreateSelect(C: ValueCompare, True: ResultVector [`0`].first,
6066	False: SelectValue, Name: "switch.select");
6067	}
6068
6069	// Handle the degenerate case where two cases have the same result value.
6070	if (ResultVector.size() == `1` && DefaultResult) {
6071	ArrayRef<ConstantInt *> CaseValues = ResultVector [`0`].second;
6072	unsigned CaseCount = CaseValues.size();
6073	// n bits group cases map to the same result:
6074	// case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6075	// case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6076	// case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6077	if (isPowerOf2_32(Value: CaseCount)) {
6078	ConstantInt *MinCaseVal = CaseValues [`0`];
6079	// Find mininal value.
6080	for (auto *Case : CaseValues)
6081	if (Case->getValue().slt(RHS: MinCaseVal->getValue()))
6082	MinCaseVal = Case;
6083
6084	// Mark the bits case number touched.
6085	APInt BitMask = APInt::getZero(numBits: MinCaseVal->getBitWidth());
6086	for (auto *Case : CaseValues)
6087	BitMask \|= (Case->getValue() - MinCaseVal->getValue());
6088
6089	// Check if cases with the same result can cover all number
6090	// in touched bits.
6091	if (BitMask.popcount() == Log2_32(Value: CaseCount)) {
6092	if (!MinCaseVal->isNullValue())
6093	Condition = Builder.CreateSub(LHS: Condition, RHS: MinCaseVal);
6094	Value *And = Builder.CreateAnd(LHS: Condition, RHS: ~BitMask, Name: "switch.and");
6095	Value *Cmp = Builder.CreateICmpEQ(
6096	LHS: And, RHS: Constant::getNullValue(Ty: And->getType()), Name: "switch.selectcmp");
6097	return Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6098	}
6099	}
6100
6101	// Handle the degenerate case where two cases have the same value.
6102	if (CaseValues.size() == `2`) {
6103	Value *Cmp1 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`0`],
6104	Name: "switch.selectcmp.case1");
6105	Value *Cmp2 = Builder.CreateICmpEQ(LHS: Condition, RHS: CaseValues [`1`],
6106	Name: "switch.selectcmp.case2");
6107	Value *Cmp = Builder.CreateOr(LHS: Cmp1, RHS: Cmp2, Name: "switch.selectcmp");
6108	return Builder.CreateSelect(C: Cmp, True: ResultVector [`0`].first, False: DefaultResult);
6109	}
6110	}
6111
6112	return nullptr;
6113	}
6114
6115	// Helper function to cleanup a switch instruction that has been converted into
6116	// a select, fixing up PHI nodes and basic blocks.
6117	static void removeSwitchAfterSelectFold(SwitchInst SI, PHINode PHI,
6118	Value *SelectValue,
6119	IRBuilder<> &Builder,
6120	DomTreeUpdater *DTU) {
6121	std::vector<DominatorTree::UpdateType> Updates;
6122
6123	BasicBlock *SelectBB = SI->getParent();
6124	BasicBlock *DestBB = PHI->getParent();
6125
6126	if (DTU && !is_contained(Range: predecessors(BB: DestBB), Element: SelectBB))
6127	Updates.push_back(x: {DominatorTree::Insert, SelectBB, DestBB});
6128	Builder.CreateBr(Dest: DestBB);
6129
6130	// Remove the switch.
6131
6132	PHI->removeIncomingValueIf(
6133	Predicate: [&](unsigned Idx) { return PHI->getIncomingBlock(i: Idx) == SelectBB; });
6134	PHI->addIncoming(V: SelectValue, BB: SelectBB);
6135
6136	SmallPtrSet<BasicBlock *, `4`> RemovedSuccessors;
6137	for (unsigned i = `0`, e = SI->getNumSuccessors(); i < e; ++i) {
6138	BasicBlock *Succ = SI->getSuccessor(idx: i);
6139
6140	if (Succ == DestBB)
6141	continue;
6142	Succ->removePredecessor(Pred: SelectBB);
6143	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6144	Updates.push_back(x: {DominatorTree::Delete, SelectBB, Succ});
6145	}
6146	SI->eraseFromParent();
6147	if (DTU)
6148	DTU->applyUpdates(Updates);
6149	}
6150
6151	/// If a switch is only used to initialize one or more phi nodes in a common
6152	/// successor block with only two different constant values, try to replace the
6153	/// switch with a select. Returns true if the fold was made.
6154	static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6155	DomTreeUpdater DTU, const* DataLayout &DL,
6156	const TargetTransformInfo &TTI) {
6157	Value *const Cond = SI->getCondition();
6158	PHINode PHI = nullptr*;
6159	BasicBlock CommonDest = nullptr*;
6160	Constant *DefaultResult;
6161	SwitchCaseResultVectorTy UniqueResults;
6162	// Collect all the cases that will deliver the same value from the switch.
6163	if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6164	DL, TTI, /MaxUniqueResults/ `2`))
6165	return false;
6166
6167	assert(PHI != nullptr && "PHI for value select not found");
6168	Builder.SetInsertPoint(SI);
6169	Value *SelectValue =
6170	foldSwitchToSelect(ResultVector: UniqueResults, DefaultResult, Condition: Cond, Builder);
6171	if (!SelectValue)
6172	return false;
6173
6174	removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6175	return true;
6176	}
6177
6178	namespace {
6179
6180	/// This class represents a lookup table that can be used to replace a switch.
6181	class SwitchLookupTable {
6182	public:
6183	/// Create a lookup table to use as a switch replacement with the contents
6184	/// of Values, using DefaultValue to fill any holes in the table.
6185	SwitchLookupTable(
6186	Module &M, uint64_t TableSize, ConstantInt *Offset,
6187	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6188	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName);
6189
6190	/// Build instructions with Builder to retrieve the value at
6191	/// the position given by Index in the lookup table.
6192	Value BuildLookup(Value Index, IRBuilder<> &Builder);
6193
6194	/// Return true if a table with TableSize elements of
6195	/// type ElementType would fit in a target-legal register.
6196	static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6197	Type *ElementType);
6198
6199	private:
6200	// Depending on the contents of the table, it can be represented in
6201	// different ways.
6202	enum {
6203	// For tables where each element contains the same value, we just have to
6204	// store that single value and return it for each lookup.
6205	SingleValueKind,
6206
6207	// For tables where there is a linear relationship between table index
6208	// and values. We calculate the result with a simple multiplication
6209	// and addition instead of a table lookup.
6210	LinearMapKind,
6211
6212	// For small tables with integer elements, we can pack them into a bitmap
6213	// that fits into a target-legal register. Values are retrieved by
6214	// shift and mask operations.
6215	BitMapKind,
6216
6217	// The table is stored as an array of values. Values are retrieved by load
6218	// instructions from the table.
6219	ArrayKind
6220	} Kind;
6221
6222	// For SingleValueKind, this is the single value.
6223	Constant SingleValue = nullptr*;
6224
6225	// For BitMapKind, this is the bitmap.
6226	ConstantInt BitMap = nullptr*;
6227	IntegerType BitMapElementTy = nullptr*;
6228
6229	// For LinearMapKind, these are the constants used to derive the value.
6230	ConstantInt LinearOffset = nullptr*;
6231	ConstantInt LinearMultiplier = nullptr*;
6232	bool LinearMapValWrapped = false;
6233
6234	// For ArrayKind, this is the array.
6235	GlobalVariable Array = nullptr*;
6236	};
6237
6238	} // end anonymous namespace
6239
6240	SwitchLookupTable::SwitchLookupTable(
6241	Module &M, uint64_t TableSize, ConstantInt *Offset,
6242	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values,
6243	Constant DefaultValue, const* DataLayout &DL, const StringRef &FuncName) {
6244	assert(Values.size() && "Can't build lookup table without values!");
6245	assert(TableSize >= Values.size() && "Can't fit values in table!");
6246
6247	// If all values in the table are equal, this is that value.
6248	SingleValue = Values.begin()->second;
6249
6250	Type *ValueType = Values.begin()->second->getType();
6251
6252	// Build up the table contents.
6253	SmallVector<Constant *, `64`> TableContents(TableSize);
6254	for (size_t I = `0`, E = Values.size(); I != E; ++I) {
6255	ConstantInt *CaseVal = Values [I].first;
6256	Constant *CaseRes = Values [I].second;
6257	assert(CaseRes->getType() == ValueType);
6258
6259	uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6260	TableContents [Idx] = CaseRes;
6261
6262	if (CaseRes != SingleValue)
6263	SingleValue = nullptr;
6264	}
6265
6266	// Fill in any holes in the table with the default result.
6267	if (Values.size() < TableSize) {
6268	assert(DefaultValue &&
6269	"Need a default value to fill the lookup table holes.");
6270	assert(DefaultValue->getType() == ValueType);
6271	for (uint64_t I = `0`; I < TableSize; ++I) {
6272	if (!TableContents [I])
6273	TableContents [I] = DefaultValue;
6274	}
6275
6276	if (DefaultValue != SingleValue)
6277	SingleValue = nullptr;
6278	}
6279
6280	// If each element in the table contains the same value, we only need to store
6281	// that single value.
6282	if (SingleValue) {
6283	Kind = SingleValueKind;
6284	return;
6285	}
6286
6287	// Check if we can derive the value with a linear transformation from the
6288	// table index.
6289	if (isa<IntegerType>(Val: ValueType)) {
6290	bool LinearMappingPossible = true;
6291	APInt PrevVal;
6292	APInt DistToPrev;
6293	// When linear map is monotonic and signed overflow doesn't happen on
6294	// maximum index, we can attach nsw on Add and Mul.
6295	bool NonMonotonic = false;
6296	assert(TableSize >= `2` && "Should be a SingleValue table.");
6297	// Check if there is the same distance between two consecutive values.
6298	for (uint64_t I = `0`; I < TableSize; ++I) {
6299	ConstantInt *ConstVal = dyn_cast<ConstantInt>(Val: TableContents [I]);
6300	if (!ConstVal) {
6301	// This is an undef. We could deal with it, but undefs in lookup tables
6302	// are very seldom. It's probably not worth the additional complexity.
6303	LinearMappingPossible = false;
6304	break;
6305	}
6306	const APInt &Val = ConstVal->getValue();
6307	if (I != `0`) {
6308	APInt Dist = Val - PrevVal;
6309	if (I == `1`) {
6310	DistToPrev = Dist;
6311	} else if (Dist != DistToPrev) {
6312	LinearMappingPossible = false;
6313	break;
6314	}
6315	NonMonotonic \|=
6316	Dist.isStrictlyPositive() ? Val.sle(RHS: PrevVal) : Val.sgt(RHS: PrevVal);
6317	}
6318	PrevVal = Val;
6319	}
6320	if (LinearMappingPossible) {
6321	LinearOffset = cast<ConstantInt>(Val: TableContents [`0`]);
6322	LinearMultiplier = ConstantInt::get(Context&: M.getContext(), V: DistToPrev);
6323	bool MayWrap = false;
6324	APInt M = LinearMultiplier->getValue();
6325	(void)M.smul_ov(RHS: APInt (M.getBitWidth(), TableSize - `1`), Overflow&: MayWrap);
6326	LinearMapValWrapped = NonMonotonic \|\| MayWrap;
6327	Kind = LinearMapKind;
6328	++NumLinearMaps;
6329	return;
6330	}
6331	}
6332
6333	// If the type is integer and the table fits in a register, build a bitmap.
6334	if (WouldFitInRegister(DL, TableSize, ElementType: ValueType)) {
6335	IntegerType *IT = cast<IntegerType>(Val: ValueType);
6336	APInt TableInt(TableSize * IT->getBitWidth(), `0`);
6337	for (uint64_t I = TableSize; I > `0`; --I) {
6338	TableInt <<= IT->getBitWidth();
6339	// Insert values into the bitmap. Undef values are set to zero.
6340	if (!isa<UndefValue>(Val: TableContents [I - `1`])) {
6341	ConstantInt *Val = cast<ConstantInt>(Val: TableContents [I - `1`]);
6342	TableInt \|= Val->getValue().zext(width: TableInt.getBitWidth());
6343	}
6344	}
6345	BitMap = ConstantInt::get(Context&: M.getContext(), V: TableInt);
6346	BitMapElementTy = IT;
6347	Kind = BitMapKind;
6348	++NumBitMaps;
6349	return;
6350	}
6351
6352	// Store the table in an array.
6353	ArrayType *ArrayTy = ArrayType::get(ElementType: ValueType, NumElements: TableSize);
6354	Constant *Initializer = ConstantArray::get(T: ArrayTy, V: TableContents);
6355
6356	Array = new GlobalVariable (M, ArrayTy, /isConstant=/true,
6357	GlobalVariable::PrivateLinkage, Initializer,
6358	"switch.table." + FuncName);
6359	Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6360	// Set the alignment to that of an array items. We will be only loading one
6361	// value out of it.
6362	Array->setAlignment(DL.getPrefTypeAlign(Ty: ValueType));
6363	Kind = ArrayKind;
6364	}
6365
6366	Value SwitchLookupTable::BuildLookup(Value Index, IRBuilder<> &Builder) {
6367	switch (Kind) {
6368	case SingleValueKind:
6369	return SingleValue;
6370	case LinearMapKind: {
6371	// Derive the result value from the input value.
6372	Value *Result = Builder.CreateIntCast(V: Index, DestTy: LinearMultiplier->getType(),
6373	isSigned: false, Name: "switch.idx.cast");
6374	if (!LinearMultiplier->isOne())
6375	Result = Builder.CreateMul(LHS: Result, RHS: LinearMultiplier, Name: "switch.idx.mult",
6376	/HasNUW = / false,
6377	/HasNSW = / !LinearMapValWrapped);
6378
6379	if (!LinearOffset->isZero())
6380	Result = Builder.CreateAdd(LHS: Result, RHS: LinearOffset, Name: "switch.offset",
6381	/HasNUW = / false,
6382	/HasNSW = / !LinearMapValWrapped);
6383	return Result;
6384	}
6385	case BitMapKind: {
6386	// Type of the bitmap (e.g. i59).
6387	IntegerType *MapTy = BitMap->getIntegerType();
6388
6389	// Cast Index to the same type as the bitmap.
6390	// Note: The Index is <= the number of elements in the table, so
6391	// truncating it to the width of the bitmask is safe.
6392	Value *ShiftAmt = Builder.CreateZExtOrTrunc(V: Index, DestTy: MapTy, Name: "switch.cast");
6393
6394	// Multiply the shift amount by the element width. NUW/NSW can always be
6395	// set, because WouldFitInRegister guarantees Index ShiftAmt is in*
6396	// BitMap's bit width.
6397	ShiftAmt = Builder.CreateMul(
6398	LHS: ShiftAmt, RHS: ConstantInt::get(Ty: MapTy, V: BitMapElementTy->getBitWidth()),
6399	Name: "switch.shiftamt",/HasNUW =/true,/HasNSW =/true);
6400
6401	// Shift down.
6402	Value *DownShifted =
6403	Builder.CreateLShr(LHS: BitMap, RHS: ShiftAmt, Name: "switch.downshift");
6404	// Mask off.
6405	return Builder.CreateTrunc(V: DownShifted, DestTy: BitMapElementTy, Name: "switch.masked");
6406	}
6407	case ArrayKind: {
6408	// Make sure the table index will not overflow when treated as signed.
6409	IntegerType *IT = cast<IntegerType>(Val: Index->getType());
6410	uint64_t TableSize =
6411	Array->getInitializer()->getType()->getArrayNumElements();
6412	if (TableSize > (`1ULL` << std::min(a: IT->getBitWidth() - `1`, b: `63u`)))
6413	Index = Builder.CreateZExt(
6414	V: Index, DestTy: IntegerType::get(C&: IT->getContext(), NumBits: IT->getBitWidth() + `1`),
6415	Name: "switch.tableidx.zext");
6416
6417	Value *GEPIndices[] = {Builder.getInt32(C: `0`), Index};
6418	Value *GEP = Builder.CreateInBoundsGEP(Ty: Array->getValueType(), Ptr: Array,
6419	IdxList: GEPIndices, Name: "switch.gep");
6420	return Builder.CreateLoad(
6421	Ty: cast<ArrayType>(Val: Array->getValueType())->getElementType(), Ptr: GEP,
6422	Name: "switch.load");
6423	}
6424	}
6425	llvm_unreachable("Unknown lookup table kind!");
6426	}
6427
6428	bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6429	uint64_t TableSize,
6430	Type *ElementType) {
6431	auto *IT = dyn_cast<IntegerType>(Val: ElementType);
6432	if (!IT)
6433	return false;
6434	// FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6435	// are <= 15, we could try to narrow the type.
6436
6437	// Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6438	if (TableSize >= UINT_MAX / IT->getBitWidth())
6439	return false;
6440	return DL.fitsInLegalInteger(Width: TableSize * IT->getBitWidth());
6441	}
6442
6443	static bool isTypeLegalForLookupTable(Type Ty, const* TargetTransformInfo &TTI,
6444	const DataLayout &DL) {
6445	// Allow any legal type.
6446	if (TTI.isTypeLegal(Ty))
6447	return true;
6448
6449	auto *IT = dyn_cast<IntegerType>(Val: Ty);
6450	if (!IT)
6451	return false;
6452
6453	// Also allow power of 2 integer types that have at least 8 bits and fit in
6454	// a register. These types are common in frontend languages and targets
6455	// usually support loads of these types.
6456	// TODO: We could relax this to any integer that fits in a register and rely
6457	// on ABI alignment and padding in the table to allow the load to be widened.
6458	// Or we could widen the constants and truncate the load.
6459	unsigned BitWidth = IT->getBitWidth();
6460	return BitWidth >= `8` && isPowerOf2_32(Value: BitWidth) &&
6461	DL.fitsInLegalInteger(Width: IT->getBitWidth());
6462	}
6463
6464	static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6465	// 40% is the default density for building a jump table in optsize/minsize
6466	// mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6467	// function was based on.
6468	const uint64_t MinDensity = `40`;
6469
6470	if (CaseRange >= UINT64_MAX / `100`)
6471	return false; // Avoid multiplication overflows below.
6472
6473	return NumCases * `100` >= CaseRange * MinDensity;
6474	}
6475
6476	static bool isSwitchDense(ArrayRef<int64_t> Values) {
6477	uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6478	uint64_t Range = Diff + `1`;
6479	if (Range < Diff)
6480	return false; // Overflow.
6481
6482	return isSwitchDense(NumCases: Values.size(), CaseRange: Range);
6483	}
6484
6485	/// Determine whether a lookup table should be built for this switch, based on
6486	/// the number of cases, size of the table, and the types of the results.
6487	// TODO: We could support larger than legal types by limiting based on the
6488	// number of loads required and/or table size. If the constants are small we
6489	// could use smaller table entries and extend after the load.
6490	static bool
6491	ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
6492	const TargetTransformInfo &TTI, const DataLayout &DL,
6493	const SmallDenseMap<PHINode , Type > &ResultTypes) {
6494	if (SI->getNumCases() > TableSize)
6495	return false; // TableSize overflowed.
6496
6497	bool AllTablesFitInRegister = true;
6498	bool HasIllegalType = false;
6499	for (const auto &I : ResultTypes) {
6500	Type *Ty = I.second;
6501
6502	// Saturate this flag to true.
6503	HasIllegalType = HasIllegalType \|\| !isTypeLegalForLookupTable(Ty, TTI, DL);
6504
6505	// Saturate this flag to false.
6506	AllTablesFitInRegister =
6507	AllTablesFitInRegister &&
6508	SwitchLookupTable::WouldFitInRegister(DL, TableSize, ElementType: Ty);
6509
6510	// If both flags saturate, we're done. NOTE: This only* works with*
6511	// saturating flags, and all flags have to saturate first due to the
6512	// non-deterministic behavior of iterating over a dense map.
6513	if (HasIllegalType && !AllTablesFitInRegister)
6514	break;
6515	}
6516
6517	// If each table would fit in a register, we should build it anyway.
6518	if (AllTablesFitInRegister)
6519	return true;
6520
6521	// Don't build a table that doesn't fit in-register if it has illegal types.
6522	if (HasIllegalType)
6523	return false;
6524
6525	return isSwitchDense(NumCases: SI->getNumCases(), CaseRange: TableSize);
6526	}
6527
6528	static bool ShouldUseSwitchConditionAsTableIndex(
6529	ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6530	bool HasDefaultResults, const SmallDenseMap<PHINode , Type > &ResultTypes,
6531	const DataLayout &DL, const TargetTransformInfo &TTI) {
6532	if (MinCaseVal.isNullValue())
6533	return true;
6534	if (MinCaseVal.isNegative() \|\|
6535	MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() \|\|
6536	!HasDefaultResults)
6537	return false;
6538	return all_of(Range: ResultTypes, P: [&](const auto &KV) {
6539	return SwitchLookupTable::WouldFitInRegister(
6540	DL, TableSize: MaxCaseVal.getLimitedValue() + `1` / TableSize /,
6541	ElementType: KV.second / ResultType /);
6542	});
6543	}
6544
6545	/// Try to reuse the switch table index compare. Following pattern:
6546	/// \code
6547	/// if (idx < tablesize)
6548	/// r = table[idx]; // table does not contain default_value
6549	/// else
6550	/// r = default_value;
6551	/// if (r != default_value)
6552	/// ...
6553	/// \endcode
6554	/// Is optimized to:
6555	/// \code
6556	/// cond = idx < tablesize;
6557	/// if (cond)
6558	/// r = table[idx];
6559	/// else
6560	/// r = default_value;
6561	/// if (cond)
6562	/// ...
6563	/// \endcode
6564	/// Jump threading will then eliminate the second if(cond).
6565	static void reuseTableCompare(
6566	User PhiUser, BasicBlock PhiBlock, BranchInst *RangeCheckBranch,
6567	Constant *DefaultValue,
6568	const SmallVectorImpl<std::pair<ConstantInt , Constant >> &Values) {
6569	ICmpInst *CmpInst = dyn_cast<ICmpInst>(Val: PhiUser);
6570	if (!CmpInst)
6571	return;
6572
6573	// We require that the compare is in the same block as the phi so that jump
6574	// threading can do its work afterwards.
6575	if (CmpInst->getParent() != PhiBlock)
6576	return;
6577
6578	Constant *CmpOp1 = dyn_cast<Constant>(Val: CmpInst->getOperand(i_nocapture: `1`));
6579	if (!CmpOp1)
6580	return;
6581
6582	Value *RangeCmp = RangeCheckBranch->getCondition();
6583	Constant *TrueConst = ConstantInt::getTrue(Ty: RangeCmp->getType());
6584	Constant *FalseConst = ConstantInt::getFalse(Ty: RangeCmp->getType());
6585
6586	// Check if the compare with the default value is constant true or false.
6587	Constant *DefaultConst = ConstantExpr::getICmp(pred: CmpInst->getPredicate(),
6588	LHS: DefaultValue, RHS: CmpOp1, OnlyIfReduced: true);
6589	if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6590	return;
6591
6592	// Check if the compare with the case values is distinct from the default
6593	// compare result.
6594	for (auto ValuePair : Values) {
6595	Constant *CaseConst = ConstantExpr::getICmp(pred: CmpInst->getPredicate(),
6596	LHS: ValuePair.second, RHS: CmpOp1, OnlyIfReduced: true);
6597	if (!CaseConst \|\| CaseConst == DefaultConst \|\|
6598	(CaseConst != TrueConst && CaseConst != FalseConst))
6599	return;
6600	}
6601
6602	// Check if the branch instruction dominates the phi node. It's a simple
6603	// dominance check, but sufficient for our needs.
6604	// Although this check is invariant in the calling loops, it's better to do it
6605	// at this late stage. Practically we do it at most once for a switch.
6606	BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6607	for (BasicBlock *Pred : predecessors(BB: PhiBlock)) {
6608	if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6609	return;
6610	}
6611
6612	if (DefaultConst == FalseConst) {
6613	// The compare yields the same result. We can replace it.
6614	CmpInst->replaceAllUsesWith(V: RangeCmp);
6615	++NumTableCmpReuses;
6616	} else {
6617	// The compare yields the same result, just inverted. We can replace it.
6618	Value *InvertedTableCmp = BinaryOperator::CreateXor(
6619	V1: RangeCmp, V2: ConstantInt::get(Ty: RangeCmp->getType(), V: `1`), Name: "inverted.cmp",
6620	It: RangeCheckBranch->getIterator());
6621	CmpInst->replaceAllUsesWith(V: InvertedTableCmp);
6622	++NumTableCmpReuses;
6623	}
6624	}
6625
6626	/// If the switch is only used to initialize one or more phi nodes in a common
6627	/// successor block with different constant values, replace the switch with
6628	/// lookup tables.
6629	static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
6630	DomTreeUpdater DTU, const* DataLayout &DL,
6631	const TargetTransformInfo &TTI) {
6632	assert(SI->getNumCases() > `1` && "Degenerate switch?");
6633
6634	BasicBlock *BB = SI->getParent();
6635	Function *Fn = BB->getParent();
6636	// Only build lookup table when we have a target that supports it or the
6637	// attribute is not set.
6638	if (!TTI.shouldBuildLookupTables() \|\|
6639	(Fn->getFnAttribute(Kind: "no-jump-tables").getValueAsBool()))
6640	return false;
6641
6642	// FIXME: If the switch is too sparse for a lookup table, perhaps we could
6643	// split off a dense part and build a lookup table for that.
6644
6645	// FIXME: This creates arrays of GEPs to constant strings, which means each
6646	// GEP needs a runtime relocation in PIC code. We should just build one big
6647	// string and lookup indices into that.
6648
6649	// Ignore switches with less than three cases. Lookup tables will not make
6650	// them faster, so we don't analyze them.
6651	if (SI->getNumCases() < `3`)
6652	return false;
6653
6654	// Figure out the corresponding result for each case value and phi node in the
6655	// common destination, as well as the min and max case values.
6656	assert(!SI->cases().empty());
6657	SwitchInst::CaseIt CI = SI->case_begin();
6658	ConstantInt *MinCaseVal = CI ->getCaseValue();
6659	ConstantInt *MaxCaseVal = CI ->getCaseValue();
6660
6661	BasicBlock CommonDest = nullptr*;
6662
6663	using ResultListTy = SmallVector<std::pair<ConstantInt , Constant >, `4`>;
6664	SmallDenseMap<PHINode *, ResultListTy> ResultLists;
6665
6666	SmallDenseMap<PHINode , Constant > DefaultResults;
6667	SmallDenseMap<PHINode , Type > ResultTypes;
6668	SmallVector<PHINode *, `4`> PHIs;
6669
6670	for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6671	ConstantInt *CaseVal = CI ->getCaseValue();
6672	if (CaseVal->getValue().slt(RHS: MinCaseVal->getValue()))
6673	MinCaseVal = CaseVal;
6674	if (CaseVal->getValue().sgt(RHS: MaxCaseVal->getValue()))
6675	MaxCaseVal = CaseVal;
6676
6677	// Resulting value at phi nodes for this case value.
6678	using ResultsTy = SmallVector<std::pair<PHINode , Constant >, `4`>;
6679	ResultsTy Results;
6680	if (!getCaseResults(SI, CaseVal, CaseDest: CI ->getCaseSuccessor(), CommonDest: &CommonDest,
6681	Res&: Results, DL, TTI))
6682	return false;
6683
6684	// Append the result from this case to the list for each phi.
6685	for (const auto &I : Results) {
6686	PHINode *PHI = I.first;
6687	Constant *Value = I.second;
6688	if (!ResultLists.count(Val: PHI))
6689	PHIs.push_back(Elt: PHI);
6690	ResultLists [PHI].push_back(Elt: std::make_pair(x&: CaseVal, y&: Value));
6691	}
6692	}
6693
6694	// Keep track of the result types.
6695	for (PHINode *PHI : PHIs) {
6696	ResultTypes [PHI] = ResultLists [PHI][`0`].second->getType();
6697	}
6698
6699	uint64_t NumResults = ResultLists [PHIs [`0`]].size();
6700
6701	// If the table has holes, we need a constant result for the default case
6702	// or a bitmask that fits in a register.
6703	SmallVector<std::pair<PHINode , Constant >, `4`> DefaultResultsList;
6704	bool HasDefaultResults =
6705	getCaseResults(SI, CaseVal: nullptr, CaseDest: SI->getDefaultDest(), CommonDest: &CommonDest,
6706	Res&: DefaultResultsList, DL, TTI);
6707
6708	for (const auto &I : DefaultResultsList) {
6709	PHINode *PHI = I.first;
6710	Constant *Result = I.second;
6711	DefaultResults [PHI] = Result;
6712	}
6713
6714	bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6715	MinCaseVal&: MinCaseVal, MaxCaseVal: MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6716	uint64_t TableSize;
6717	if (UseSwitchConditionAsTableIndex)
6718	TableSize = MaxCaseVal->getLimitedValue() + `1`;
6719	else
6720	TableSize =
6721	(MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + `1`;
6722
6723	bool TableHasHoles = (NumResults < TableSize);
6724	bool NeedMask = (TableHasHoles && !HasDefaultResults);
6725	if (NeedMask) {
6726	// As an extra penalty for the validity test we require more cases.
6727	if (SI->getNumCases() < `4`) // FIXME: Find best threshold value (benchmark).
6728	return false;
6729	if (!DL.fitsInLegalInteger(Width: TableSize))
6730	return false;
6731	}
6732
6733	if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6734	return false;
6735
6736	std::vector<DominatorTree::UpdateType> Updates;
6737
6738	// Compute the maximum table size representable by the integer type we are
6739	// switching upon.
6740	unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6741	uint64_t MaxTableSize = CaseSize > `63` ? UINT64_MAX : `1ULL` << CaseSize;
6742	assert(MaxTableSize >= TableSize &&
6743	"It is impossible for a switch to have more entries than the max "
6744	"representable value of its input integer type's size.");
6745
6746	// If the default destination is unreachable, or if the lookup table covers
6747	// all values of the conditional variable, branch directly to the lookup table
6748	// BB. Otherwise, check that the condition is within the case range.
6749	bool DefaultIsReachable =
6750	!isa<UnreachableInst>(Val: SI->getDefaultDest()->getFirstNonPHIOrDbg());
6751
6752	// Create the BB that does the lookups.
6753	Module &Mod = *CommonDest->getParent()->getParent();
6754	BasicBlock *LookupBB = BasicBlock::Create(
6755	Context&: Mod.getContext(), Name: "switch.lookup", Parent: CommonDest->getParent(), InsertBefore: CommonDest);
6756
6757	// Compute the table index value.
6758	Builder.SetInsertPoint(SI);
6759	Value *TableIndex;
6760	ConstantInt *TableIndexOffset;
6761	if (UseSwitchConditionAsTableIndex) {
6762	TableIndexOffset = ConstantInt::get(Ty: MaxCaseVal->getIntegerType(), V: `0`);
6763	TableIndex = SI->getCondition();
6764	} else {
6765	TableIndexOffset = MinCaseVal;
6766	// If the default is unreachable, all case values are s>= MinCaseVal. Then
6767	// we can try to attach nsw.
6768	bool MayWrap = true;
6769	if (!DefaultIsReachable) {
6770	APInt Res = MaxCaseVal->getValue().ssub_ov(RHS: MinCaseVal->getValue(), Overflow&: MayWrap);
6771	(void)Res;
6772	}
6773
6774	TableIndex = Builder.CreateSub(LHS: SI->getCondition(), RHS: TableIndexOffset,
6775	Name: "switch.tableidx", /HasNUW =/false,
6776	/HasNSW =/!MayWrap);
6777	}
6778
6779	BranchInst RangeCheckBranch = nullptr*;
6780
6781	// Grow the table to cover all possible index values to avoid the range check.
6782	// It will use the default result to fill in the table hole later, so make
6783	// sure it exist.
6784	if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6785	ConstantRange CR = computeConstantRange(V: TableIndex, / ForSigned / false);
6786	// Grow the table shouldn't have any size impact by checking
6787	// WouldFitInRegister.
6788	// TODO: Consider growing the table also when it doesn't fit in a register
6789	// if no optsize is specified.
6790	const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6791	if (!CR.isUpperWrapped() && all_of(Range&: ResultTypes, P: [&](const auto &KV) {
6792	return SwitchLookupTable::WouldFitInRegister(
6793	DL, TableSize: UpperBound, ElementType: KV.second / ResultType /);
6794	})) {
6795	// There may be some case index larger than the UpperBound (unreachable
6796	// case), so make sure the table size does not get smaller.
6797	TableSize = std::max(a: UpperBound, b: TableSize);
6798	// The default branch is unreachable after we enlarge the lookup table.
6799	// Adjust DefaultIsReachable to reuse code path.
6800	DefaultIsReachable = false;
6801	}
6802	}
6803
6804	const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6805	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
6806	Builder.CreateBr(Dest: LookupBB);
6807	if (DTU)
6808	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
6809	// Note: We call removeProdecessor later since we need to be able to get the
6810	// PHI value for the default case in case we're using a bit mask.
6811	} else {
6812	Value *Cmp = Builder.CreateICmpULT(
6813	LHS: TableIndex, RHS: ConstantInt::get(Ty: MinCaseVal->getType(), V: TableSize));
6814	RangeCheckBranch =
6815	Builder.CreateCondBr(Cond: Cmp, True: LookupBB, False: SI->getDefaultDest());
6816	if (DTU)
6817	Updates.push_back(x: {DominatorTree::Insert, BB, LookupBB});
6818	}
6819
6820	// Populate the BB that does the lookups.
6821	Builder.SetInsertPoint(LookupBB);
6822
6823	if (NeedMask) {
6824	// Before doing the lookup, we do the hole check. The LookupBB is therefore
6825	// re-purposed to do the hole check, and we create a new LookupBB.
6826	BasicBlock *MaskBB = LookupBB;
6827	MaskBB->setName("switch.hole_check");
6828	LookupBB = BasicBlock::Create(Context&: Mod.getContext(), Name: "switch.lookup",
6829	Parent: CommonDest->getParent(), InsertBefore: CommonDest);
6830
6831	// Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6832	// unnecessary illegal types.
6833	uint64_t TableSizePowOf2 = NextPowerOf2(A: std::max(a: `7ULL`, b: TableSize - `1ULL`));
6834	APInt MaskInt(TableSizePowOf2, `0`);
6835	APInt One(TableSizePowOf2, `1`);
6836	// Build bitmask; fill in a 1 bit for every case.
6837	const ResultListTy &ResultList = ResultLists [PHIs [`0`]];
6838	for (size_t I = `0`, E = ResultList.size(); I != E; ++I) {
6839	uint64_t Idx = (ResultList [I].first->getValue() - TableIndexOffset->getValue())
6840	.getLimitedValue();
6841	MaskInt \|= One << Idx;
6842	}
6843	ConstantInt *TableMask = ConstantInt::get(Context&: Mod.getContext(), V: MaskInt);
6844
6845	// Get the TableIndex'th bit of the bitmask.
6846	// If this bit is 0 (meaning hole) jump to the default destination,
6847	// else continue with table lookup.
6848	IntegerType *MapTy = TableMask->getIntegerType();
6849	Value *MaskIndex =
6850	Builder.CreateZExtOrTrunc(V: TableIndex, DestTy: MapTy, Name: "switch.maskindex");
6851	Value *Shifted = Builder.CreateLShr(LHS: TableMask, RHS: MaskIndex, Name: "switch.shifted");
6852	Value *LoBit = Builder.CreateTrunc(
6853	V: Shifted, DestTy: Type::getInt1Ty(C&: Mod.getContext()), Name: "switch.lobit");
6854	Builder.CreateCondBr(Cond: LoBit, True: LookupBB, False: SI->getDefaultDest());
6855	if (DTU) {
6856	Updates.push_back(x: {DominatorTree::Insert, MaskBB, LookupBB});
6857	Updates.push_back(x: {DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6858	}
6859	Builder.SetInsertPoint(LookupBB);
6860	AddPredecessorToBlock(Succ: SI->getDefaultDest(), NewPred: MaskBB, ExistPred: BB);
6861	}
6862
6863	if (!DefaultIsReachable \|\| GeneratingCoveredLookupTable) {
6864	// We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6865	// do not delete PHINodes here.
6866	SI->getDefaultDest()->removePredecessor(Pred: BB,
6867	/KeepOneInputPHIs=/true);
6868	if (DTU)
6869	Updates.push_back(x: {DominatorTree::Delete, BB, SI->getDefaultDest()});
6870	}
6871
6872	for (PHINode *PHI : PHIs) {
6873	const ResultListTy &ResultList = ResultLists [PHI];
6874
6875	// If using a bitmask, use any value to fill the lookup table holes.
6876	Constant *DV = NeedMask ? ResultLists [PHI][`0`].second : DefaultResults [PHI];
6877	StringRef FuncName = Fn->getName();
6878	SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6879	DL, FuncName);
6880
6881	Value *Result = Table.BuildLookup(Index: TableIndex, Builder);
6882
6883	// Do a small peephole optimization: re-use the switch table compare if
6884	// possible.
6885	if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6886	BasicBlock *PhiBlock = PHI->getParent();
6887	// Search for compare instructions which use the phi.
6888	for (auto *User : PHI->users()) {
6889	reuseTableCompare(PhiUser: User, PhiBlock, RangeCheckBranch, DefaultValue: DV, Values: ResultList);
6890	}
6891	}
6892
6893	PHI->addIncoming(V: Result, BB: LookupBB);
6894	}
6895
6896	Builder.CreateBr(Dest: CommonDest);
6897	if (DTU)
6898	Updates.push_back(x: {DominatorTree::Insert, LookupBB, CommonDest});
6899
6900	// Remove the switch.
6901	SmallPtrSet<BasicBlock *, `8`> RemovedSuccessors;
6902	for (unsigned i = `0`, e = SI->getNumSuccessors(); i < e; ++i) {
6903	BasicBlock *Succ = SI->getSuccessor(idx: i);
6904
6905	if (Succ == SI->getDefaultDest())
6906	continue;
6907	Succ->removePredecessor(Pred: BB);
6908	if (DTU && RemovedSuccessors.insert(Ptr: Succ).second)
6909	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
6910	}
6911	SI->eraseFromParent();
6912
6913	if (DTU)
6914	DTU->applyUpdates(Updates);
6915
6916	++NumLookupTables;
6917	if (NeedMask)
6918	++NumLookupTablesHoles;
6919	return true;
6920	}
6921
6922	/// Try to transform a switch that has "holes" in it to a contiguous sequence
6923	/// of cases.
6924	///
6925	/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6926	/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6927	///
6928	/// This converts a sparse switch into a dense switch which allows better
6929	/// lowering and could also allow transforming into a lookup table.
6930	static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6931	const DataLayout &DL,
6932	const TargetTransformInfo &TTI) {
6933	auto *CondTy = cast<IntegerType>(Val: SI->getCondition()->getType());
6934	if (CondTy->getIntegerBitWidth() > `64` \|\|
6935	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
6936	return false;
6937	// Only bother with this optimization if there are more than 3 switch cases;
6938	// SDAG will only bother creating jump tables for 4 or more cases.
6939	if (SI->getNumCases() < `4`)
6940	return false;
6941
6942	// This transform is agnostic to the signedness of the input or case values. We
6943	// can treat the case values as signed or unsigned. We can optimize more common
6944	// cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6945	// as signed.
6946	SmallVector<int64_t,`4`> Values;
6947	for (const auto &C : SI->cases())
6948	Values.push_back(Elt: C.getCaseValue()->getValue().getSExtValue());
6949	llvm::sort(C&: Values);
6950
6951	// If the switch is already dense, there's nothing useful to do here.
6952	if (isSwitchDense(Values))
6953	return false;
6954
6955	// First, transform the values such that they start at zero and ascend.
6956	int64_t Base = Values [`0`];
6957	for (auto &V : Values)
6958	V -= (uint64_t)(Base);
6959
6960	// Now we have signed numbers that have been shifted so that, given enough
6961	// precision, there are no negative values. Since the rest of the transform
6962	// is bitwise only, we switch now to an unsigned representation.
6963
6964	// This transform can be done speculatively because it is so cheap - it
6965	// results in a single rotate operation being inserted.
6966
6967	// countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6968	// one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6969	// less than 64.
6970	unsigned Shift = `64`;
6971	for (auto &V : Values)
6972	Shift = std::min(a: Shift, b: (unsigned)llvm::countr_zero(Val: (uint64_t)V));
6973	assert(Shift < `64`);
6974	if (Shift > `0`)
6975	for (auto &V : Values)
6976	V = (int64_t)((uint64_t)V >> Shift);
6977
6978	if (!isSwitchDense(Values))
6979	// Transform didn't create a dense switch.
6980	return false;
6981
6982	// The obvious transform is to shift the switch condition right and emit a
6983	// check that the condition actually cleanly divided by GCD, i.e.
6984	// C & (1 << Shift - 1) == 0
6985	// inserting a new CFG edge to handle the case where it didn't divide cleanly.
6986	//
6987	// A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6988	// shift and puts the shifted-off bits in the uppermost bits. If any of these
6989	// are nonzero then the switch condition will be very large and will hit the
6990	// default case.
6991
6992	auto *Ty = cast<IntegerType>(Val: SI->getCondition()->getType());
6993	Builder.SetInsertPoint(SI);
6994	Value *Sub =
6995	Builder.CreateSub(LHS: SI->getCondition(), RHS: ConstantInt::get(Ty, V: Base));
6996	Value *Rot = Builder.CreateIntrinsic(
6997	Ty, Intrinsic::fshl,
6998	{Sub, Sub, ConstantInt::get(Ty, V: Ty->getBitWidth() - Shift)});
6999	SI->replaceUsesOfWith(From: SI->getCondition(), To: Rot);
7000
7001	for (auto Case : SI->cases()) {
7002	auto *Orig = Case.getCaseValue();
7003	auto Sub = Orig->getValue() - APInt (Ty->getBitWidth(), Base);
7004	Case.setValue(cast<ConstantInt>(Val: ConstantInt::get(Ty, V: Sub.lshr(shiftAmt: Shift))));
7005	}
7006	return true;
7007	}
7008
7009	/// Tries to transform switch of powers of two to reduce switch range.
7010	/// For example, switch like:
7011	/// switch (C) { case 1: case 2: case 64: case 128: }
7012	/// will be transformed to:
7013	/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7014	///
7015	/// This transformation allows better lowering and could allow transforming into
7016	/// a lookup table.
7017	static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
7018	const DataLayout &DL,
7019	const TargetTransformInfo &TTI) {
7020	Value *Condition = SI->getCondition();
7021	LLVMContext &Context = SI->getContext();
7022	auto *CondTy = cast<IntegerType>(Val: Condition->getType());
7023
7024	if (CondTy->getIntegerBitWidth() > `64` \|\|
7025	!DL.fitsInLegalInteger(Width: CondTy->getIntegerBitWidth()))
7026	return false;
7027
7028	const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
7029	IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
7030	{Condition, ConstantInt::getTrue(Context)}),
7031	TTI::TCK_SizeAndLatency);
7032
7033	if (CttzIntrinsicCost > TTI::TCC_Basic)
7034	// Inserting intrinsic is too expensive.
7035	return false;
7036
7037	// Only bother with this optimization if there are more than 3 switch cases.
7038	// SDAG will only bother creating jump tables for 4 or more cases.
7039	if (SI->getNumCases() < `4`)
7040	return false;
7041
7042	// We perform this optimization only for switches with
7043	// unreachable default case.
7044	// This assumtion will save us from checking if `Condition` is a power of two.
7045	if (!isa<UnreachableInst>(Val: SI->getDefaultDest()->getFirstNonPHIOrDbg()))
7046	return false;
7047
7048	// Check that switch cases are powers of two.
7049	SmallVector<uint64_t, `4`> Values;
7050	for (const auto &Case : SI->cases()) {
7051	uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7052	if (llvm::has_single_bit(Value: CaseValue))
7053	Values.push_back(Elt: CaseValue);
7054	else
7055	return false;
7056	}
7057
7058	// isSwichDense requires case values to be sorted.
7059	llvm::sort(C&: Values);
7060	if (!isSwitchDense(NumCases: Values.size(), CaseRange: llvm::countr_zero(Val: Values.back()) -
7061	llvm::countr_zero(Val: Values.front()) + `1`))
7062	// Transform is unable to generate dense switch.
7063	return false;
7064
7065	Builder.SetInsertPoint(SI);
7066
7067	// Replace each case with its trailing zeros number.
7068	for (auto &Case : SI->cases()) {
7069	auto *OrigValue = Case.getCaseValue();
7070	Case.setValue(ConstantInt::get(Ty: OrigValue->getIntegerType(),
7071	V: OrigValue->getValue().countr_zero()));
7072	}
7073
7074	// Replace condition with its trailing zeros number.
7075	auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7076	Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7077
7078	SI->setCondition(ConditionTrailingZeros);
7079
7080	return true;
7081	}
7082
7083	bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7084	BasicBlock *BB = SI->getParent();
7085
7086	if (isValueEqualityComparison(TI: SI)) {
7087	// If we only have one predecessor, and if it is a branch on this value,
7088	// see if that predecessor totally determines the outcome of this switch.
7089	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7090	if (SimplifyEqualityComparisonWithOnlyPredecessor(TI: SI, Pred: OnlyPred, Builder))
7091	return requestResimplify();
7092
7093	Value *Cond = SI->getCondition();
7094	if (SelectInst *Select = dyn_cast<SelectInst>(Val: Cond))
7095	if (SimplifySwitchOnSelect(SI, Select))
7096	return requestResimplify();
7097
7098	// If the block only contains the switch, see if we can fold the block
7099	// away into any preds.
7100	if (SI == &BB->instructionsWithoutDebug(SkipPseudoOp: false*).begin())
7101	if (FoldValueComparisonIntoPredecessors(TI: SI, Builder))
7102	return requestResimplify();
7103	}
7104
7105	// Try to transform the switch into an icmp and a branch.
7106	// The conversion from switch to comparison may lose information on
7107	// impossible switch values, so disable it early in the pipeline.
7108	if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7109	return requestResimplify();
7110
7111	// Remove unreachable cases.
7112	if (eliminateDeadSwitchCases(SI, DTU, AC: Options.AC, DL))
7113	return requestResimplify();
7114
7115	if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7116	return requestResimplify();
7117
7118	if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7119	return requestResimplify();
7120
7121	// The conversion from switch to lookup tables results in difficult-to-analyze
7122	// code and makes pruning branches much harder. This is a problem if the
7123	// switch expression itself can still be restricted as a result of inlining or
7124	// CVP. Therefore, only apply this transformation during late stages of the
7125	// optimisation pipeline.
7126	if (Options.ConvertSwitchToLookupTable &&
7127	SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7128	return requestResimplify();
7129
7130	if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7131	return requestResimplify();
7132
7133	if (ReduceSwitchRange(SI, Builder, DL, TTI))
7134	return requestResimplify();
7135
7136	if (HoistCommon &&
7137	hoistCommonCodeFromSuccessors(BB: SI->getParent(), EqTermsOnly: !Options.HoistCommonInsts))
7138	return requestResimplify();
7139
7140	return false;
7141	}
7142
7143	bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7144	BasicBlock *BB = IBI->getParent();
7145	bool Changed = false;
7146
7147	// Eliminate redundant destinations.
7148	SmallPtrSet<Value *, `8`> Succs;
7149	SmallSetVector<BasicBlock *, `8`> RemovedSuccs;
7150	for (unsigned i = `0`, e = IBI->getNumDestinations(); i != e; ++i) {
7151	BasicBlock *Dest = IBI->getDestination(i);
7152	if (!Dest->hasAddressTaken() \|\| !Succs.insert(Ptr: Dest).second) {
7153	if (!Dest->hasAddressTaken())
7154	RemovedSuccs.insert(X: Dest);
7155	Dest->removePredecessor(Pred: BB);
7156	IBI->removeDestination(i);
7157	--i;
7158	--e;
7159	Changed = true;
7160	}
7161	}
7162
7163	if (DTU) {
7164	std::vector<DominatorTree::UpdateType> Updates;
7165	Updates.reserve(n: RemovedSuccs.size());
7166	for (auto *RemovedSucc : RemovedSuccs)
7167	Updates.push_back(x: {DominatorTree::Delete, BB, RemovedSucc});
7168	DTU->applyUpdates(Updates);
7169	}
7170
7171	if (IBI->getNumDestinations() == `0`) {
7172	// If the indirectbr has no successors, change it to unreachable.
7173	new UnreachableInst (IBI->getContext(), IBI->getIterator());
7174	EraseTerminatorAndDCECond(TI: IBI);
7175	return true;
7176	}
7177
7178	if (IBI->getNumDestinations() == `1`) {
7179	// If the indirectbr has one successor, change it to a direct branch.
7180	BranchInst::Create(IfTrue: IBI->getDestination(i: `0`), InsertBefore: IBI->getIterator());
7181	EraseTerminatorAndDCECond(TI: IBI);
7182	return true;
7183	}
7184
7185	if (SelectInst *SI = dyn_cast<SelectInst>(Val: IBI->getAddress())) {
7186	if (SimplifyIndirectBrOnSelect(IBI, SI))
7187	return requestResimplify();
7188	}
7189	return Changed;
7190	}
7191
7192	/// Given an block with only a single landing pad and a unconditional branch
7193	/// try to find another basic block which this one can be merged with. This
7194	/// handles cases where we have multiple invokes with unique landing pads, but
7195	/// a shared handler.
7196	///
7197	/// We specifically choose to not worry about merging non-empty blocks
7198	/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
7199	/// practice, the optimizer produces empty landing pad blocks quite frequently
7200	/// when dealing with exception dense code. (see: instcombine, gvn, if-else
7201	/// sinking in this file)
7202	///
7203	/// This is primarily a code size optimization. We need to avoid performing
7204	/// any transform which might inhibit optimization (such as our ability to
7205	/// specialize a particular handler via tail commoning). We do this by not
7206	/// merging any blocks which require us to introduce a phi. Since the same
7207	/// values are flowing through both blocks, we don't lose any ability to
7208	/// specialize. If anything, we make such specialization more likely.
7209	///
7210	/// TODO - This transformation could remove entries from a phi in the target
7211	/// block when the inputs in the phi are the same for the two blocks being
7212	/// merged. In some cases, this could result in removal of the PHI entirely.
7213	static bool TryToMergeLandingPad(LandingPadInst LPad, BranchInst BI,
7214	BasicBlock BB, DomTreeUpdater DTU) {
7215	auto Succ = BB->getUniqueSuccessor();
7216	assert(Succ);
7217	// If there's a phi in the successor block, we'd likely have to introduce
7218	// a phi into the merged landing pad block.
7219	if (isa<PHINode>(Val: *Succ->begin()))
7220	return false;
7221
7222	for (BasicBlock *OtherPred : predecessors(BB: Succ)) {
7223	if (BB == OtherPred)
7224	continue;
7225	BasicBlock::iterator I = OtherPred->begin();
7226	LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(Val&: I);
7227	if (!LPad2 \|\| !LPad2->isIdenticalTo(I: LPad))
7228	continue;
7229	for (++I; isa<DbgInfoIntrinsic>(Val: I); ++I)
7230	;
7231	BranchInst *BI2 = dyn_cast<BranchInst>(Val&: I);
7232	if (!BI2 \|\| !BI2->isIdenticalTo(I: BI))
7233	continue;
7234
7235	std::vector<DominatorTree::UpdateType> Updates;
7236
7237	// We've found an identical block. Update our predecessors to take that
7238	// path instead and make ourselves dead.
7239	SmallSetVector<BasicBlock *, `16`> UniquePreds(pred_begin(BB), pred_end(BB));
7240	for (BasicBlock *Pred : UniquePreds) {
7241	InvokeInst *II = cast<InvokeInst>(Val: Pred->getTerminator());
7242	assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7243	"unexpected successor");
7244	II->setUnwindDest(OtherPred);
7245	if (DTU) {
7246	Updates.push_back(x: {DominatorTree::Insert, Pred, OtherPred});
7247	Updates.push_back(x: {DominatorTree::Delete, Pred, BB});
7248	}
7249	}
7250
7251	// The debug info in OtherPred doesn't cover the merged control flow that
7252	// used to go through BB. We need to delete it or update it.
7253	for (Instruction &Inst : llvm::make_early_inc_range(Range&: *OtherPred))
7254	if (isa<DbgInfoIntrinsic>(Val: Inst))
7255	Inst.eraseFromParent();
7256
7257	SmallSetVector<BasicBlock *, `16`> UniqueSuccs(succ_begin(BB), succ_end(BB));
7258	for (BasicBlock *Succ : UniqueSuccs) {
7259	Succ->removePredecessor(Pred: BB);
7260	if (DTU)
7261	Updates.push_back(x: {DominatorTree::Delete, BB, Succ});
7262	}
7263
7264	IRBuilder<> Builder(BI);
7265	Builder.CreateUnreachable();
7266	BI->eraseFromParent();
7267	if (DTU)
7268	DTU->applyUpdates(Updates);
7269	return true;
7270	}
7271	return false;
7272	}
7273
7274	bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7275	return Branch->isUnconditional() ? simplifyUncondBranch(BI: Branch, Builder)
7276	: simplifyCondBranch(BI: Branch, Builder);
7277	}
7278
7279	bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7280	IRBuilder<> &Builder) {
7281	BasicBlock *BB = BI->getParent();
7282	BasicBlock *Succ = BI->getSuccessor(i: `0`);
7283
7284	// If the Terminator is the only non-phi instruction, simplify the block.
7285	// If LoopHeader is provided, check if the block or its successor is a loop
7286	// header. (This is for early invocations before loop simplify and
7287	// vectorization to keep canonical loop forms for nested loops. These blocks
7288	// can be eliminated when the pass is invoked later in the back-end.)
7289	// Note that if BB has only one predecessor then we do not introduce new
7290	// backedge, so we can eliminate BB.
7291	bool NeedCanonicalLoop =
7292	Options.NeedCanonicalLoop &&
7293	(!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(N: `2`) &&
7294	(is_contained(Range&: LoopHeaders, Element: BB) \|\| is_contained(Range&: LoopHeaders, Element: Succ)));
7295	BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(SkipPseudoOp: true)->getIterator();
7296	if (I ->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7297	!NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7298	return true;
7299
7300	// If the only instruction in the block is a seteq/setne comparison against a
7301	// constant, try to simplify the block.
7302	if (ICmpInst *ICI = dyn_cast<ICmpInst>(Val&: I))
7303	if (ICI->isEquality() && isa<ConstantInt>(Val: ICI->getOperand(i_nocapture: `1`))) {
7304	for (++I; isa<DbgInfoIntrinsic>(Val: I); ++I)
7305	;
7306	if (I ->isTerminator() &&
7307	tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7308	return true;
7309	}
7310
7311	// See if we can merge an empty landing pad block with another which is
7312	// equivalent.
7313	if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(Val&: I)) {
7314	for (++I; isa<DbgInfoIntrinsic>(Val: I); ++I)
7315	;
7316	if (I ->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7317	return true;
7318	}
7319
7320	// If this basic block is ONLY a compare and a branch, and if a predecessor
7321	// branches to us and our successor, fold the comparison into the
7322	// predecessor and use logical operations to update the incoming value
7323	// for PHI nodes in common successor.
7324	if (Options.SpeculateBlocks &&
7325	FoldBranchToCommonDest(BI, DTU, /MSSAU=/nullptr, TTI: &TTI,
7326	BonusInstThreshold: Options.BonusInstThreshold))
7327	return requestResimplify();
7328	return false;
7329	}
7330
7331	static BasicBlock allPredecessorsComeFromSameSource(BasicBlock BB) {
7332	BasicBlock PredPred = nullptr*;
7333	for (auto *P : predecessors(BB)) {
7334	BasicBlock *PPred = P->getSinglePredecessor();
7335	if (!PPred \|\| (PredPred && PredPred != PPred))
7336	return nullptr;
7337	PredPred = PPred;
7338	}
7339	return PredPred;
7340	}
7341
7342	bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7343	assert(
7344	!isa<ConstantInt>(BI->getCondition()) &&
7345	BI->getSuccessor(`0`) != BI->getSuccessor(`1`) &&
7346	"Tautological conditional branch should have been eliminated already.");
7347
7348	BasicBlock *BB = BI->getParent();
7349	if (!Options.SimplifyCondBranch \|\|
7350	BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7351	return false;
7352
7353	// Conditional branch
7354	if (isValueEqualityComparison(TI: BI)) {
7355	// If we only have one predecessor, and if it is a branch on this value,
7356	// see if that predecessor totally determines the outcome of this
7357	// switch.
7358	if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7359	if (SimplifyEqualityComparisonWithOnlyPredecessor(TI: BI, Pred: OnlyPred, Builder))
7360	return requestResimplify();
7361
7362	// This block must be empty, except for the setcond inst, if it exists.
7363	// Ignore dbg and pseudo intrinsics.
7364	auto I = BB->instructionsWithoutDebug(SkipPseudoOp: true).begin();
7365	if (&*I == BI) {
7366	if (FoldValueComparisonIntoPredecessors(TI: BI, Builder))
7367	return requestResimplify();
7368	} else if (&*I == cast<Instruction>(Val: BI->getCondition())) {
7369	++I;
7370	if (&*I == BI && FoldValueComparisonIntoPredecessors(TI: BI, Builder))
7371	return requestResimplify();
7372	}
7373	}
7374
7375	// Try to turn "br (X == 0 \| X == 1), T, F" into a switch instruction.
7376	if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7377	return true;
7378
7379	// If this basic block has dominating predecessor blocks and the dominating
7380	// blocks' conditions imply BI's condition, we know the direction of BI.
7381	std::optional<bool> Imp = isImpliedByDomCondition(Cond: BI->getCondition(), ContextI: BI, DL);
7382	if (Imp) {
7383	// Turn this into a branch on constant.
7384	auto *OldCond = BI->getCondition();
7385	ConstantInt TorF = Imp ? ConstantInt::getTrue(Context&: BB->getContext())
7386	: ConstantInt::getFalse(Context&: BB->getContext());
7387	BI->setCondition(TorF);
7388	RecursivelyDeleteTriviallyDeadInstructions(V: OldCond);
7389	return requestResimplify();
7390	}
7391
7392	// If this basic block is ONLY a compare and a branch, and if a predecessor
7393	// branches to us and one of our successors, fold the comparison into the
7394	// predecessor and use logical operations to pick the right destination.
7395	if (Options.SpeculateBlocks &&
7396	FoldBranchToCommonDest(BI, DTU, /MSSAU=/nullptr, TTI: &TTI,
7397	BonusInstThreshold: Options.BonusInstThreshold))
7398	return requestResimplify();
7399
7400	// We have a conditional branch to two blocks that are only reachable
7401	// from BI. We know that the condbr dominates the two blocks, so see if
7402	// there is any identical code in the "then" and "else" blocks. If so, we
7403	// can hoist it up to the branching block.
7404	if (BI->getSuccessor(i: `0`)->getSinglePredecessor()) {
7405	if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
7406	if (HoistCommon && hoistCommonCodeFromSuccessors(
7407	BB: BI->getParent(), EqTermsOnly: !Options.HoistCommonInsts))
7408	return requestResimplify();
7409	} else {
7410	// If Successor #1 has multiple preds, we may be able to conditionally
7411	// execute Successor #0 if it branches to Successor #1.
7412	Instruction *Succ0TI = BI->getSuccessor(i: `0`)->getTerminator();
7413	if (Succ0TI->getNumSuccessors() == `1` &&
7414	Succ0TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `1`))
7415	if (SpeculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `0`)))
7416	return requestResimplify();
7417	}
7418	} else if (BI->getSuccessor(i: `1`)->getSinglePredecessor()) {
7419	// If Successor #0 has multiple preds, we may be able to conditionally
7420	// execute Successor #1 if it branches to Successor #0.
7421	Instruction *Succ1TI = BI->getSuccessor(i: `1`)->getTerminator();
7422	if (Succ1TI->getNumSuccessors() == `1` &&
7423	Succ1TI->getSuccessor(Idx: `0`) == BI->getSuccessor(i: `0`))
7424	if (SpeculativelyExecuteBB(BI, ThenBB: BI->getSuccessor(i: `1`)))
7425	return requestResimplify();
7426	}
7427
7428	// If this is a branch on something for which we know the constant value in
7429	// predecessors (e.g. a phi node in the current block), thread control
7430	// through this block.
7431	if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, AC: Options.AC))
7432	return requestResimplify();
7433
7434	// Scan predecessor blocks for conditional branches.
7435	for (BasicBlock *Pred : predecessors(BB))
7436	if (BranchInst *PBI = dyn_cast<BranchInst>(Val: Pred->getTerminator()))
7437	if (PBI != BI && PBI->isConditional())
7438	if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7439	return requestResimplify();
7440
7441	// Look for diamond patterns.
7442	if (MergeCondStores)
7443	if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
7444	if (BranchInst *PBI = dyn_cast<BranchInst>(Val: PrevBB->getTerminator()))
7445	if (PBI != BI && PBI->isConditional())
7446	if (mergeConditionalStores(PBI, QBI: BI, DTU, DL, TTI))
7447	return requestResimplify();
7448
7449	return false;
7450	}
7451
7452	/// Check if passing a value to an instruction will cause undefined behavior.
7453	static bool passingValueIsAlwaysUndefined(Value V, Instruction I, bool PtrValueMayBeModified) {
7454	Constant *C = dyn_cast<Constant>(Val: V);
7455	if (!C)
7456	return false;
7457
7458	if (I->use_empty())
7459	return false;
7460
7461	if (C->isNullValue() \|\| isa<UndefValue>(Val: C)) {
7462	// Only look at the first use, avoid hurting compile time with long uselists
7463	auto Use = cast<Instruction>(Val: I->user_begin());
7464	// Bail out if Use is not in the same BB as I or Use == I or Use comes
7465	// before I in the block. The latter two can be the case if Use is a PHI
7466	// node.
7467	if (Use->getParent() != I->getParent() \|\| Use == I \|\| Use->comesBefore(Other: I))
7468	return false;
7469
7470	// Now make sure that there are no instructions in between that can alter
7471	// control flow (eg. calls)
7472	auto InstrRange =
7473	make_range(x: std::next(x: I->getIterator()), y: Use->getIterator());
7474	if (any_of(Range&: InstrRange, P: [](Instruction &I) {
7475	return !isGuaranteedToTransferExecutionToSuccessor(I: &I);
7476	}))
7477	return false;
7478
7479	// Look through GEPs. A load from a GEP derived from NULL is still undefined
7480	if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Val: Use))
7481	if (GEP->getPointerOperand() == I) {
7482	// The current base address is null, there are four cases to consider:
7483	// getelementptr (TY, null, 0) -> null
7484	// getelementptr (TY, null, not zero) -> may be modified
7485	// getelementptr inbounds (TY, null, 0) -> null
7486	// getelementptr inbounds (TY, null, not zero) -> poison iff null is
7487	// undefined?
7488	if (!GEP->hasAllZeroIndices() &&
7489	(!GEP->isInBounds() \|\|
7490	NullPointerIsDefined(F: GEP->getFunction(),
7491	AS: GEP->getPointerAddressSpace())))
7492	PtrValueMayBeModified = true;
7493	return passingValueIsAlwaysUndefined(V, I: GEP, PtrValueMayBeModified);
7494	}
7495
7496	// Look through return.
7497	if (ReturnInst *Ret = dyn_cast<ReturnInst>(Val: Use)) {
7498	bool HasNoUndefAttr =
7499	Ret->getFunction()->hasRetAttribute(Attribute::Kind: NoUndef);
7500	// Return undefined to a noundef return value is undefined.
7501	if (isa<UndefValue>(Val: C) && HasNoUndefAttr)
7502	return true;
7503	// Return null to a nonnull+noundef return value is undefined.
7504	if (C->isNullValue() && HasNoUndefAttr &&
7505	Ret->getFunction()->hasRetAttribute(Attribute::Kind: NonNull)) {
7506	return !PtrValueMayBeModified;
7507	}
7508	}
7509
7510	// Look through bitcasts.
7511	if (BitCastInst *BC = dyn_cast<BitCastInst>(Val: Use))
7512	return passingValueIsAlwaysUndefined(V, I: BC, PtrValueMayBeModified);
7513
7514	// Load from null is undefined.
7515	if (LoadInst *LI = dyn_cast<LoadInst>(Val: Use))
7516	if (!LI->isVolatile())
7517	return !NullPointerIsDefined(F: LI->getFunction(),
7518	AS: LI->getPointerAddressSpace());
7519
7520	// Store to null is undefined.
7521	if (StoreInst *SI = dyn_cast<StoreInst>(Val: Use))
7522	if (!SI->isVolatile())
7523	return (!NullPointerIsDefined(F: SI->getFunction(),
7524	AS: SI->getPointerAddressSpace())) &&
7525	SI->getPointerOperand() == I;
7526
7527	if (auto *CB = dyn_cast<CallBase>(Val: Use)) {
7528	if (C->isNullValue() && NullPointerIsDefined(F: CB->getFunction()))
7529	return false;
7530	// A call to null is undefined.
7531	if (CB->getCalledOperand() == I)
7532	return true;
7533
7534	if (C->isNullValue()) {
7535	for (const llvm::Use &Arg : CB->args())
7536	if (Arg == I) {
7537	unsigned ArgIdx = CB->getArgOperandNo(U: &Arg);
7538	if (CB->isPassingUndefUB(ArgNo: ArgIdx) &&
7539	CB->paramHasAttr(ArgNo: ArgIdx, Attribute::Kind: NonNull)) {
7540	// Passing null to a nonnnull+noundef argument is undefined.
7541	return !PtrValueMayBeModified;
7542	}
7543	}
7544	} else if (isa<UndefValue>(Val: C)) {
7545	// Passing undef to a noundef argument is undefined.
7546	for (const llvm::Use &Arg : CB->args())
7547	if (Arg == I) {
7548	unsigned ArgIdx = CB->getArgOperandNo(U: &Arg);
7549	if (CB->isPassingUndefUB(ArgNo: ArgIdx)) {
7550	// Passing undef to a noundef argument is undefined.
7551	return true;
7552	}
7553	}
7554	}
7555	}
7556	}
7557	return false;
7558	}
7559
7560	/// If BB has an incoming value that will always trigger undefined behavior
7561	/// (eg. null pointer dereference), remove the branch leading here.
7562	static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
7563	DomTreeUpdater *DTU,
7564	AssumptionCache *AC) {
7565	for (PHINode &PHI : BB->phis())
7566	for (unsigned i = `0`, e = PHI.getNumIncomingValues(); i != e; ++i)
7567	if (passingValueIsAlwaysUndefined(V: PHI.getIncomingValue(i), I: &PHI)) {
7568	BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7569	Instruction *T = Predecessor->getTerminator();
7570	IRBuilder<> Builder(T);
7571	if (BranchInst *BI = dyn_cast<BranchInst>(Val: T)) {
7572	BB->removePredecessor(Pred: Predecessor);
7573	// Turn unconditional branches into unreachables and remove the dead
7574	// destination from conditional branches.
7575	if (BI->isUnconditional())
7576	Builder.CreateUnreachable();
7577	else {
7578	// Preserve guarding condition in assume, because it might not be
7579	// inferrable from any dominating condition.
7580	Value *Cond = BI->getCondition();
7581	CallInst *Assumption;
7582	if (BI->getSuccessor(i: `0`) == BB)
7583	Assumption = Builder.CreateAssumption(Cond: Builder.CreateNot(V: Cond));
7584	else
7585	Assumption = Builder.CreateAssumption(Cond);
7586	if (AC)
7587	AC->registerAssumption(CI: cast<AssumeInst>(Val: Assumption));
7588	Builder.CreateBr(Dest: BI->getSuccessor(i: `0`) == BB ? BI->getSuccessor(i: `1`)
7589	: BI->getSuccessor(i: `0`));
7590	}
7591	BI->eraseFromParent();
7592	if (DTU)
7593	DTU->applyUpdates(Updates: {{DominatorTree::Delete, Predecessor, BB}});
7594	return true;
7595	} else if (SwitchInst *SI = dyn_cast<SwitchInst>(Val: T)) {
7596	// Redirect all branches leading to UB into
7597	// a newly created unreachable block.
7598	BasicBlock *Unreachable = BasicBlock::Create(
7599	Context&: Predecessor->getContext(), Name: "unreachable", Parent: BB->getParent(), InsertBefore: BB);
7600	Builder.SetInsertPoint(Unreachable);
7601	// The new block contains only one instruction: Unreachable
7602	Builder.CreateUnreachable();
7603	for (const auto &Case : SI->cases())
7604	if (Case.getCaseSuccessor() == BB) {
7605	BB->removePredecessor(Pred: Predecessor);
7606	Case.setSuccessor(Unreachable);
7607	}
7608	if (SI->getDefaultDest() == BB) {
7609	BB->removePredecessor(Pred: Predecessor);
7610	SI->setDefaultDest(Unreachable);
7611	}
7612
7613	if (DTU)
7614	DTU->applyUpdates(
7615	Updates: { { DominatorTree::Insert, Predecessor, Unreachable },
7616	{ DominatorTree::Delete, Predecessor, BB } });
7617	return true;
7618	}
7619	}
7620
7621	return false;
7622	}
7623
7624	bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7625	bool Changed = false;
7626
7627	assert(BB && BB->getParent() && "Block not embedded in function!");
7628	assert(BB->getTerminator() && "Degenerate basic block encountered!");
7629
7630	// Remove basic blocks that have no predecessors (except the entry block)...
7631	// or that just have themself as a predecessor. These are unreachable.
7632	if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) \|\|
7633	BB->getSinglePredecessor() == BB) {
7634	LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7635	DeleteDeadBlock(BB, DTU);
7636	return true;
7637	}
7638
7639	// Check to see if we can constant propagate this terminator instruction
7640	// away...
7641	Changed \|= ConstantFoldTerminator(BB, /DeleteDeadConditions=/true,
7642	/TLI=/nullptr, DTU);
7643
7644	// Check for and eliminate duplicate PHI nodes in this block.
7645	Changed \|= EliminateDuplicatePHINodes(BB);
7646
7647	// Check for and remove branches that will always cause undefined behavior.
7648	if (removeUndefIntroducingPredecessor(BB, DTU, AC: Options.AC))
7649	return requestResimplify();
7650
7651	// Merge basic blocks into their predecessor if there is only one distinct
7652	// pred, and if there is only one distinct successor of the predecessor, and
7653	// if there are no PHI nodes.
7654	if (MergeBlockIntoPredecessor(BB, DTU))
7655	return true;
7656
7657	if (SinkCommon && Options.SinkCommonInsts)
7658	if (SinkCommonCodeFromPredecessors(BB, DTU) \|\|
7659	MergeCompatibleInvokes(BB, DTU)) {
7660	// SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7661	// so we may now how duplicate PHI's.
7662	// Let's rerun EliminateDuplicatePHINodes() first,
7663	// before FoldTwoEntryPHINode() potentially converts them into select's,
7664	// after which we'd need a whole EarlyCSE pass run to cleanup them.
7665	return true;
7666	}
7667
7668	IRBuilder<> Builder(BB);
7669
7670	if (Options.SpeculateBlocks &&
7671	!BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7672	// If there is a trivial two-entry PHI node in this basic block, and we can
7673	// eliminate it, do so now.
7674	if (auto *PN = dyn_cast<PHINode>(Val: BB->begin()))
7675	if (PN->getNumIncomingValues() == `2`)
7676	if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7677	return true;
7678	}
7679
7680	Instruction *Terminator = BB->getTerminator();
7681	Builder.SetInsertPoint(Terminator);
7682	switch (Terminator->getOpcode()) {
7683	case Instruction::Br:
7684	Changed \|= simplifyBranch(Branch: cast<BranchInst>(Val: Terminator), Builder);
7685	break;
7686	case Instruction::Resume:
7687	Changed \|= simplifyResume(RI: cast<ResumeInst>(Val: Terminator), Builder);
7688	break;
7689	case Instruction::CleanupRet:
7690	Changed \|= simplifyCleanupReturn(RI: cast<CleanupReturnInst>(Val: Terminator));
7691	break;
7692	case Instruction::Switch:
7693	Changed \|= simplifySwitch(SI: cast<SwitchInst>(Val: Terminator), Builder);
7694	break;
7695	case Instruction::Unreachable:
7696	Changed \|= simplifyUnreachable(UI: cast<UnreachableInst>(Val: Terminator));
7697	break;
7698	case Instruction::IndirectBr:
7699	Changed \|= simplifyIndirectBr(IBI: cast<IndirectBrInst>(Val: Terminator));
7700	break;
7701	}
7702
7703	return Changed;
7704	}
7705
7706	bool SimplifyCFGOpt::run(BasicBlock *BB) {
7707	bool Changed = false;
7708
7709	// Repeated simplify BB as long as resimplification is requested.
7710	do {
7711	Resimplify = false;
7712
7713	// Perform one round of simplifcation. Resimplify flag will be set if
7714	// another iteration is requested.
7715	Changed \|= simplifyOnce(BB);
7716	} while (Resimplify);
7717
7718	return Changed;
7719	}
7720
7721	bool llvm::simplifyCFG(BasicBlock BB, const* TargetTransformInfo &TTI,
7722	DomTreeUpdater DTU, const* SimplifyCFGOptions &Options,
7723	ArrayRef<WeakVH> LoopHeaders) {
7724	return SimplifyCFGOpt (TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7725	Options)
7726	.run(BB);
7727	}
7728

source code of llvm/lib/Transforms/Utils/SimplifyCFG.cpp