VPlanRecipes.cpp source code [llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp]

1	//===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file contains implementations for different VPlan recipes.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "VPlan.h"
15	#include "VPlanAnalysis.h"
16	#include "llvm/ADT/STLExtras.h"
17	#include "llvm/ADT/SmallVector.h"
18	#include "llvm/ADT/Twine.h"
19	#include "llvm/Analysis/IVDescriptors.h"
20	#include "llvm/IR/BasicBlock.h"
21	#include "llvm/IR/IRBuilder.h"
22	#include "llvm/IR/Instruction.h"
23	#include "llvm/IR/Instructions.h"
24	#include "llvm/IR/Type.h"
25	#include "llvm/IR/Value.h"
26	#include "llvm/Support/Casting.h"
27	#include "llvm/Support/CommandLine.h"
28	#include "llvm/Support/Debug.h"
29	#include "llvm/Support/raw_ostream.h"
30	#include "llvm/Transforms/Utils/BasicBlockUtils.h"
31	#include "llvm/Transforms/Utils/LoopUtils.h"
32	#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
33	#include <cassert>
34
35	using namespace llvm;
36
37	using VectorParts = SmallVector<Value *, `2`>;
38
39	namespace llvm {
40	extern cl::opt<bool> EnableVPlanNativePath;
41	}
42
43	#define LV_NAME "loop-vectorize"
44	#define DEBUG_TYPE LV_NAME
45
46	bool VPRecipeBase::mayWriteToMemory() const {
47	switch (getVPDefID()) {
48	case VPInterleaveSC:
49	return cast<VPInterleaveRecipe>(Val: this)->getNumStoreOperands() > `0`;
50	case VPWidenStoreEVLSC:
51	case VPWidenStoreSC:
52	return true;
53	case VPReplicateSC:
54	case VPWidenCallSC:
55	return cast<Instruction>(Val: getVPSingleValue()->getUnderlyingValue())
56	->mayWriteToMemory();
57	case VPBranchOnMaskSC:
58	case VPScalarIVStepsSC:
59	case VPPredInstPHISC:
60	return false;
61	case VPBlendSC:
62	case VPReductionSC:
63	case VPWidenCanonicalIVSC:
64	case VPWidenCastSC:
65	case VPWidenGEPSC:
66	case VPWidenIntOrFpInductionSC:
67	case VPWidenLoadEVLSC:
68	case VPWidenLoadSC:
69	case VPWidenPHISC:
70	case VPWidenSC:
71	case VPWidenSelectSC: {
72	const Instruction *I =
73	dyn_cast_or_null<Instruction>(Val: getVPSingleValue()->getUnderlyingValue());
74	(void)I;
75	assert((!I \|\| !I->mayWriteToMemory()) &&
76	"underlying instruction may write to memory");
77	return false;
78	}
79	default:
80	return true;
81	}
82	}
83
84	bool VPRecipeBase::mayReadFromMemory() const {
85	switch (getVPDefID()) {
86	case VPWidenLoadEVLSC:
87	case VPWidenLoadSC:
88	return true;
89	case VPReplicateSC:
90	case VPWidenCallSC:
91	return cast<Instruction>(Val: getVPSingleValue()->getUnderlyingValue())
92	->mayReadFromMemory();
93	case VPBranchOnMaskSC:
94	case VPPredInstPHISC:
95	case VPScalarIVStepsSC:
96	case VPWidenStoreEVLSC:
97	case VPWidenStoreSC:
98	return false;
99	case VPBlendSC:
100	case VPReductionSC:
101	case VPWidenCanonicalIVSC:
102	case VPWidenCastSC:
103	case VPWidenGEPSC:
104	case VPWidenIntOrFpInductionSC:
105	case VPWidenPHISC:
106	case VPWidenSC:
107	case VPWidenSelectSC: {
108	const Instruction *I =
109	dyn_cast_or_null<Instruction>(Val: getVPSingleValue()->getUnderlyingValue());
110	(void)I;
111	assert((!I \|\| !I->mayReadFromMemory()) &&
112	"underlying instruction may read from memory");
113	return false;
114	}
115	default:
116	return true;
117	}
118	}
119
120	bool VPRecipeBase::mayHaveSideEffects() const {
121	switch (getVPDefID()) {
122	case VPDerivedIVSC:
123	case VPPredInstPHISC:
124	case VPScalarCastSC:
125	return false;
126	case VPInstructionSC:
127	switch (cast<VPInstruction>(Val: this)->getOpcode()) {
128	case Instruction::Or:
129	case Instruction::ICmp:
130	case Instruction::Select:
131	case VPInstruction::Not:
132	case VPInstruction::CalculateTripCountMinusVF:
133	case VPInstruction::CanonicalIVIncrementForPart:
134	case VPInstruction::PtrAdd:
135	return false;
136	default:
137	return true;
138	}
139	case VPWidenCallSC:
140	return cast<Instruction>(Val: getVPSingleValue()->getUnderlyingValue())
141	->mayHaveSideEffects();
142	case VPBlendSC:
143	case VPReductionSC:
144	case VPScalarIVStepsSC:
145	case VPWidenCanonicalIVSC:
146	case VPWidenCastSC:
147	case VPWidenGEPSC:
148	case VPWidenIntOrFpInductionSC:
149	case VPWidenPHISC:
150	case VPWidenPointerInductionSC:
151	case VPWidenSC:
152	case VPWidenSelectSC: {
153	const Instruction *I =
154	dyn_cast_or_null<Instruction>(Val: getVPSingleValue()->getUnderlyingValue());
155	(void)I;
156	assert((!I \|\| !I->mayHaveSideEffects()) &&
157	"underlying instruction has side-effects");
158	return false;
159	}
160	case VPInterleaveSC:
161	return mayWriteToMemory();
162	case VPWidenLoadEVLSC:
163	case VPWidenLoadSC:
164	case VPWidenStoreEVLSC:
165	case VPWidenStoreSC:
166	assert(
167	cast<VPWidenMemoryRecipe>(this)->getIngredient().mayHaveSideEffects() ==
168	mayWriteToMemory() &&
169	"mayHaveSideffects result for ingredient differs from this "
170	"implementation");
171	return mayWriteToMemory();
172	case VPReplicateSC: {
173	auto R = cast<VPReplicateRecipe>(Val: this*);
174	return R->getUnderlyingInstr()->mayHaveSideEffects();
175	}
176	default:
177	return true;
178	}
179	}
180
181	void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) {
182	auto Lane = VPLane::getLastLaneForVF(VF: State.VF);
183	VPValue *ExitValue = getOperand(N: `0`);
184	if (vputils::isUniformAfterVectorization(VPV: ExitValue))
185	Lane = VPLane::getFirstLane();
186	VPBasicBlock *MiddleVPBB =
187	cast<VPBasicBlock>(Val: Plan.getVectorLoopRegion()->getSingleSuccessor());
188	assert(MiddleVPBB->getNumSuccessors() == `0` &&
189	"the middle block must not have any successors");
190	BasicBlock *MiddleBB = State.CFG.VPBB2IRBB [MiddleVPBB];
191	Phi->addIncoming(V: State.get(Def: ExitValue, Instance: VPIteration (State.UF - `1`, Lane)),
192	BB: MiddleBB);
193	}
194
195	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
196	void VPLiveOut::print(raw_ostream &O, VPSlotTracker &SlotTracker) const {
197	O << "Live-out ";
198	getPhi()->printAsOperand(O);
199	O << " = ";
200	getOperand(N: `0`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
201	O << "\n";
202	}
203	#endif
204
205	void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
206	assert(!Parent && "Recipe already in some VPBasicBlock");
207	assert(InsertPos->getParent() &&
208	"Insertion position not in any VPBasicBlock");
209	InsertPos->getParent()->insert(Recipe: this, InsertPt: InsertPos->getIterator());
210	}
211
212	void VPRecipeBase::insertBefore(VPBasicBlock &BB,
213	iplist<VPRecipeBase>::iterator I) {
214	assert(!Parent && "Recipe already in some VPBasicBlock");
215	assert(I == BB.end() \|\| I ->getParent() == &BB);
216	BB.insert(Recipe: this, InsertPt: I);
217	}
218
219	void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
220	assert(!Parent && "Recipe already in some VPBasicBlock");
221	assert(InsertPos->getParent() &&
222	"Insertion position not in any VPBasicBlock");
223	InsertPos->getParent()->insert(Recipe: this, InsertPt: std::next(x: InsertPos->getIterator()));
224	}
225
226	void VPRecipeBase::removeFromParent() {
227	assert(getParent() && "Recipe not in any VPBasicBlock");
228	getParent()->getRecipeList().remove(IT: getIterator());
229	Parent = nullptr;
230	}
231
232	iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
233	assert(getParent() && "Recipe not in any VPBasicBlock");
234	return getParent()->getRecipeList().erase(where: getIterator());
235	}
236
237	void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
238	removeFromParent();
239	insertAfter(InsertPos);
240	}
241
242	void VPRecipeBase::moveBefore(VPBasicBlock &BB,
243	iplist<VPRecipeBase>::iterator I) {
244	removeFromParent();
245	insertBefore(BB, I);
246	}
247
248	FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const {
249	assert(OpType == OperationType::FPMathOp &&
250	"recipe doesn't have fast math flags");
251	FastMathFlags Res;
252	Res.setAllowReassoc(FMFs.AllowReassoc);
253	Res.setNoNaNs(FMFs.NoNaNs);
254	Res.setNoInfs(FMFs.NoInfs);
255	Res.setNoSignedZeros(FMFs.NoSignedZeros);
256	Res.setAllowReciprocal(FMFs.AllowReciprocal);
257	Res.setAllowContract(FMFs.AllowContract);
258	Res.setApproxFunc(FMFs.ApproxFunc);
259	return Res;
260	}
261
262	VPInstruction::VPInstruction(unsigned Opcode, CmpInst::Predicate Pred,
263	VPValue A, VPValue B, DebugLoc DL,
264	const Twine &Name)
265	: VPRecipeWithIRFlags (VPDef::VPInstructionSC, ArrayRef<VPValue *>({A, B}),
266	Pred, DL),
267	Opcode(Opcode), Name(Name.str()) {
268	assert(Opcode == Instruction::ICmp &&
269	"only ICmp predicates supported at the moment");
270	}
271
272	VPInstruction::VPInstruction(unsigned Opcode,
273	std::initializer_list<VPValue *> Operands,
274	FastMathFlags FMFs, DebugLoc DL, const Twine &Name)
275	: VPRecipeWithIRFlags (VPDef::VPInstructionSC, Operands, FMFs, DL),
276	Opcode(Opcode), Name(Name.str()) {
277	// Make sure the VPInstruction is a floating-point operation.
278	assert(isFPMathOp() && "this op can't take fast-math flags");
279	}
280
281	bool VPInstruction::doesGeneratePerAllLanes() const {
282	return Opcode == VPInstruction::PtrAdd && !vputils::onlyFirstLaneUsed(Def: this);
283	}
284
285	bool VPInstruction::canGenerateScalarForFirstLane() const {
286	if (Instruction::isBinaryOp(Opcode: getOpcode()))
287	return true;
288
289	switch (Opcode) {
290	case VPInstruction::BranchOnCond:
291	case VPInstruction::BranchOnCount:
292	case VPInstruction::CalculateTripCountMinusVF:
293	case VPInstruction::CanonicalIVIncrementForPart:
294	case VPInstruction::ComputeReductionResult:
295	case VPInstruction::PtrAdd:
296	case VPInstruction::ExplicitVectorLength:
297	return true;
298	default:
299	return false;
300	}
301	}
302
303	Value *VPInstruction::generatePerLane(VPTransformState &State,
304	const VPIteration &Lane) {
305	IRBuilderBase &Builder = State.Builder;
306
307	assert(getOpcode() == VPInstruction::PtrAdd &&
308	"only PtrAdd opcodes are supported for now");
309	return Builder.CreatePtrAdd(Ptr: State.get(Def: getOperand(N: `0`), Instance: Lane),
310	Offset: State.get(Def: getOperand(N: `1`), Instance: Lane), Name);
311	}
312
313	Value VPInstruction::generatePerPart(VPTransformState &State, unsigned* Part) {
314	IRBuilderBase &Builder = State.Builder;
315
316	if (Instruction::isBinaryOp(Opcode: getOpcode())) {
317	bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(Def: this);
318	if (Part != `0` && vputils::onlyFirstPartUsed(Def: this))
319	return State.get(Def: this, Part: `0`, IsScalar: OnlyFirstLaneUsed);
320
321	Value *A = State.get(Def: getOperand(N: `0`), Part, IsScalar: OnlyFirstLaneUsed);
322	Value *B = State.get(Def: getOperand(N: `1`), Part, IsScalar: OnlyFirstLaneUsed);
323	auto *Res =
324	Builder.CreateBinOp(Opc: (Instruction::BinaryOps)getOpcode(), LHS: A, RHS: B, Name);
325	if (auto *I = dyn_cast<Instruction>(Val: Res))
326	setFlags(I);
327	return Res;
328	}
329
330	switch (getOpcode()) {
331	case VPInstruction::Not: {
332	Value *A = State.get(Def: getOperand(N: `0`), Part);
333	return Builder.CreateNot(V: A, Name);
334	}
335	case Instruction::ICmp: {
336	Value *A = State.get(Def: getOperand(N: `0`), Part);
337	Value *B = State.get(Def: getOperand(N: `1`), Part);
338	return Builder.CreateCmp(Pred: getPredicate(), LHS: A, RHS: B, Name);
339	}
340	case Instruction::Select: {
341	Value *Cond = State.get(Def: getOperand(N: `0`), Part);
342	Value *Op1 = State.get(Def: getOperand(N: `1`), Part);
343	Value *Op2 = State.get(Def: getOperand(N: `2`), Part);
344	return Builder.CreateSelect(C: Cond, True: Op1, False: Op2, Name);
345	}
346	case VPInstruction::ActiveLaneMask: {
347	// Get first lane of vector induction variable.
348	Value *VIVElem0 = State.get(Def: getOperand(N: `0`), Instance: VPIteration (Part, `0`));
349	// Get the original loop tripcount.
350	Value *ScalarTC = State.get(Def: getOperand(N: `1`), Instance: VPIteration (Part, `0`));
351
352	// If this part of the active lane mask is scalar, generate the CMP directly
353	// to avoid unnecessary extracts.
354	if (State.VF.isScalar())
355	return Builder.CreateCmp(Pred: CmpInst::Predicate::ICMP_ULT, LHS: VIVElem0, RHS: ScalarTC,
356	Name);
357
358	auto *Int1Ty = Type::getInt1Ty(C&: Builder.getContext());
359	auto *PredTy = VectorType::get(ElementType: Int1Ty, EC: State.VF);
360	return Builder.CreateIntrinsic(Intrinsic::get_active_lane_mask,
361	{PredTy, ScalarTC->getType()},
362	{VIVElem0, ScalarTC}, nullptr, Name);
363	}
364	case VPInstruction::FirstOrderRecurrenceSplice: {
365	// Generate code to combine the previous and current values in vector v3.
366	//
367	// vector.ph:
368	// v_init = vector(..., ..., ..., a[-1])
369	// br vector.body
370	//
371	// vector.body
372	// i = phi [0, vector.ph], [i+4, vector.body]
373	// v1 = phi [v_init, vector.ph], [v2, vector.body]
374	// v2 = a[i, i+1, i+2, i+3];
375	// v3 = vector(v1(3), v2(0, 1, 2))
376
377	// For the first part, use the recurrence phi (v1), otherwise v2.
378	auto *V1 = State.get(Def: getOperand(N: `0`), Part: `0`);
379	Value *PartMinus1 = Part == `0` ? V1 : State.get(Def: getOperand(N: `1`), Part: Part - `1`);
380	if (!PartMinus1->getType()->isVectorTy())
381	return PartMinus1;
382	Value *V2 = State.get(Def: getOperand(N: `1`), Part);
383	return Builder.CreateVectorSplice(V1: PartMinus1, V2, Imm: -`1`, Name);
384	}
385	case VPInstruction::CalculateTripCountMinusVF: {
386	if (Part != `0`)
387	return State.get(Def: this, Part: `0`, /IsScalar/ true);
388
389	Value *ScalarTC = State.get(Def: getOperand(N: `0`), Instance: {`0`, `0`});
390	Value *Step =
391	createStepForVF(B&: Builder, Ty: ScalarTC->getType(), VF: State.VF, Step: State.UF);
392	Value *Sub = Builder.CreateSub(LHS: ScalarTC, RHS: Step);
393	Value *Cmp = Builder.CreateICmp(P: CmpInst::Predicate::ICMP_UGT, LHS: ScalarTC, RHS: Step);
394	Value *Zero = ConstantInt::get(Ty: ScalarTC->getType(), V: `0`);
395	return Builder.CreateSelect(C: Cmp, True: Sub, False: Zero);
396	}
397	case VPInstruction::ExplicitVectorLength: {
398	// Compute EVL
399	auto GetEVL = [=](VPTransformState &State, Value *AVL) {
400	assert(AVL->getType()->isIntegerTy() &&
401	"Requested vector length should be an integer.");
402
403	// TODO: Add support for MaxSafeDist for correct loop emission.
404	assert(State.VF.isScalable() && "Expected scalable vector factor.");
405	Value *VFArg = State.Builder.getInt32(C: State.VF.getKnownMinValue());
406
407	Value *EVL = State.Builder.CreateIntrinsic(
408	State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length,
409	{AVL, VFArg, State.Builder.getTrue()});
410	return EVL;
411	};
412	// TODO: Restructure this code with an explicit remainder loop, vsetvli can
413	// be outside of the main loop.
414	assert(Part == `0` && "No unrolling expected for predicated vectorization.");
415	// Compute VTC - IV as the AVL (requested vector length).
416	Value *Index = State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`));
417	Value *TripCount = State.get(Def: getOperand(N: `1`), Instance: VPIteration (`0`, `0`));
418	Value *AVL = State.Builder.CreateSub(LHS: TripCount, RHS: Index);
419	Value *EVL = GetEVL(State, AVL);
420	return EVL;
421	}
422	case VPInstruction::CanonicalIVIncrementForPart: {
423	auto *IV = State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`));
424	if (Part == `0`)
425	return IV;
426
427	// The canonical IV is incremented by the vectorization factor (num of SIMD
428	// elements) times the unroll part.
429	Value *Step = createStepForVF(B&: Builder, Ty: IV->getType(), VF: State.VF, Step: Part);
430	return Builder.CreateAdd(LHS: IV, RHS: Step, Name, HasNUW: hasNoUnsignedWrap(),
431	HasNSW: hasNoSignedWrap());
432	}
433	case VPInstruction::BranchOnCond: {
434	if (Part != `0`)
435	return nullptr;
436
437	Value *Cond = State.get(Def: getOperand(N: `0`), Instance: VPIteration (Part, `0`));
438	VPRegionBlock *ParentRegion = getParent()->getParent();
439	VPBasicBlock *Header = ParentRegion->getEntryBasicBlock();
440
441	// Replace the temporary unreachable terminator with a new conditional
442	// branch, hooking it up to backward destination for exiting blocks now and
443	// to forward destination(s) later when they are created.
444	BranchInst *CondBr =
445	Builder.CreateCondBr(Cond, True: Builder.GetInsertBlock(), False: nullptr);
446
447	if (getParent()->isExiting())
448	CondBr->setSuccessor(idx: `1`, NewSucc: State.CFG.VPBB2IRBB [Header]);
449
450	CondBr->setSuccessor(idx: `0`, NewSucc: nullptr);
451	Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
452	return CondBr;
453	}
454	case VPInstruction::BranchOnCount: {
455	if (Part != `0`)
456	return nullptr;
457	// First create the compare.
458	Value IV = State.get(Def: getOperand(N: `0`), Part, /IsScalar/* true);
459	Value TC = State.get(Def: getOperand(N: `1`), Part, /IsScalar/* true);
460	Value *Cond = Builder.CreateICmpEQ(LHS: IV, RHS: TC);
461
462	// Now create the branch.
463	auto *Plan = getParent()->getPlan();
464	VPRegionBlock *TopRegion = Plan->getVectorLoopRegion();
465	VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock();
466
467	// Replace the temporary unreachable terminator with a new conditional
468	// branch, hooking it up to backward destination (the header) now and to the
469	// forward destination (the exit/middle block) later when it is created.
470	// Note that CreateCondBr expects a valid BB as first argument, so we need
471	// to set it to nullptr later.
472	BranchInst *CondBr = Builder.CreateCondBr(Cond, True: Builder.GetInsertBlock(),
473	False: State.CFG.VPBB2IRBB [Header]);
474	CondBr->setSuccessor(idx: `0`, NewSucc: nullptr);
475	Builder.GetInsertBlock()->getTerminator()->eraseFromParent();
476	return CondBr;
477	}
478	case VPInstruction::ComputeReductionResult: {
479	if (Part != `0`)
480	return State.get(Def: this, Part: `0`, /IsScalar/ true);
481
482	// FIXME: The cross-recipe dependency on VPReductionPHIRecipe is temporary
483	// and will be removed by breaking up the recipe further.
484	auto *PhiR = cast<VPReductionPHIRecipe>(Val: getOperand(N: `0`));
485	auto *OrigPhi = cast<PHINode>(Val: PhiR->getUnderlyingValue());
486	// Get its reduction variable descriptor.
487	const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
488
489	RecurKind RK = RdxDesc.getRecurrenceKind();
490
491	VPValue *LoopExitingDef = getOperand(N: `1`);
492	Type *PhiTy = OrigPhi->getType();
493	VectorParts RdxParts(State.UF);
494	for (unsigned Part = `0`; Part < State.UF; ++Part)
495	RdxParts [Part] = State.get(Def: LoopExitingDef, Part, IsScalar: PhiR->isInLoop());
496
497	// If the vector reduction can be performed in a smaller type, we truncate
498	// then extend the loop exit value to enable InstCombine to evaluate the
499	// entire expression in the smaller type.
500	// TODO: Handle this in truncateToMinBW.
501	if (State.VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
502	Type *RdxVecTy = VectorType::get(ElementType: RdxDesc.getRecurrenceType(), EC: State.VF);
503	for (unsigned Part = `0`; Part < State.UF; ++Part)
504	RdxParts [Part] = Builder.CreateTrunc(V: RdxParts [Part], DestTy: RdxVecTy);
505	}
506	// Reduce all of the unrolled parts into a single vector.
507	Value *ReducedPartRdx = RdxParts [`0`];
508	unsigned Op = RecurrenceDescriptor::getOpcode(Kind: RK);
509
510	if (PhiR->isOrdered()) {
511	ReducedPartRdx = RdxParts [State.UF - `1`];
512	} else {
513	// Floating-point operations should have some FMF to enable the reduction.
514	IRBuilderBase::FastMathFlagGuard FMFG(Builder);
515	Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
516	for (unsigned Part = `1`; Part < State.UF; ++Part) {
517	Value *RdxPart = RdxParts [Part];
518	if (Op != Instruction::ICmp && Op != Instruction::FCmp)
519	ReducedPartRdx = Builder.CreateBinOp(
520	Opc: (Instruction::BinaryOps)Op, LHS: RdxPart, RHS: ReducedPartRdx, Name: "bin.rdx");
521	else if (RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind: RK)) {
522	TrackingVH<Value> ReductionStartValue =
523	RdxDesc.getRecurrenceStartValue();
524	ReducedPartRdx = createAnyOfOp(Builder, StartVal: ReductionStartValue, RK,
525	Left: ReducedPartRdx, Right: RdxPart);
526	} else
527	ReducedPartRdx = createMinMaxOp(Builder, RK, Left: ReducedPartRdx, Right: RdxPart);
528	}
529	}
530
531	// Create the reduction after the loop. Note that inloop reductions create
532	// the target reduction in the loop using a Reduction recipe.
533	if (State.VF.isVector() && !PhiR->isInLoop()) {
534	ReducedPartRdx =
535	createTargetReduction(B&: Builder, Desc: RdxDesc, Src: ReducedPartRdx, OrigPhi);
536	// If the reduction can be performed in a smaller type, we need to extend
537	// the reduction to the wider type before we branch to the original loop.
538	if (PhiTy != RdxDesc.getRecurrenceType())
539	ReducedPartRdx = RdxDesc.isSigned()
540	? Builder.CreateSExt(V: ReducedPartRdx, DestTy: PhiTy)
541	: Builder.CreateZExt(V: ReducedPartRdx, DestTy: PhiTy);
542	}
543
544	// If there were stores of the reduction value to a uniform memory address
545	// inside the loop, create the final store here.
546	if (StoreInst *SI = RdxDesc.IntermediateStore) {
547	auto *NewSI = Builder.CreateAlignedStore(
548	Val: ReducedPartRdx, Ptr: SI->getPointerOperand(), Align: SI->getAlign());
549	propagateMetadata(I: NewSI, VL: SI);
550	}
551
552	return ReducedPartRdx;
553	}
554	case VPInstruction::PtrAdd: {
555	assert(vputils::onlyFirstLaneUsed(this) &&
556	"can only generate first lane for PtrAdd");
557	Value Ptr = State.get(Def: getOperand(N: `0`), Part, /* IsScalar / true);
558	Value Addend = State.get(Def: getOperand(N: `1`), Part, /* IsScalar / true);
559	return Builder.CreatePtrAdd(Ptr, Offset: Addend, Name);
560	}
561	default:
562	llvm_unreachable("Unsupported opcode for instruction");
563	}
564	}
565
566	#if !defined(NDEBUG)
567	bool VPInstruction::isFPMathOp() const {
568	// Inspired by FPMathOperator::classof. Notable differences are that we don't
569	// support Call, PHI and Select opcodes here yet.
570	return Opcode == Instruction::FAdd \|\| Opcode == Instruction::FMul \|\|
571	Opcode == Instruction::FNeg \|\| Opcode == Instruction::FSub \|\|
572	Opcode == Instruction::FDiv \|\| Opcode == Instruction::FRem \|\|
573	Opcode == Instruction::FCmp \|\| Opcode == Instruction::Select;
574	}
575	#endif
576
577	void VPInstruction::execute(VPTransformState &State) {
578	assert(!State.Instance && "VPInstruction executing an Instance");
579	IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
580	assert((hasFastMathFlags() == isFPMathOp() \|\|
581	getOpcode() == Instruction::Select) &&
582	"Recipe not a FPMathOp but has fast-math flags?");
583	if (hasFastMathFlags())
584	State.Builder.setFastMathFlags(getFastMathFlags());
585	State.setDebugLocFrom(getDebugLoc());
586	bool GeneratesPerFirstLaneOnly =
587	canGenerateScalarForFirstLane() &&
588	(vputils::onlyFirstLaneUsed(Def: this) \|\|
589	getOpcode() == VPInstruction::ComputeReductionResult);
590	bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
591	for (unsigned Part = `0`; Part < State.UF; ++Part) {
592	if (GeneratesPerAllLanes) {
593	for (unsigned Lane = `0`, NumLanes = State.VF.getKnownMinValue();
594	Lane != NumLanes; ++Lane) {
595	Value *GeneratedValue = generatePerLane(State, Lane: VPIteration (Part, Lane));
596	assert(GeneratedValue && "generatePerLane must produce a value");
597	State.set(Def: this, V: GeneratedValue, Instance: VPIteration (Part, Lane));
598	}
599	continue;
600	}
601
602	Value *GeneratedValue = generatePerPart(State, Part);
603	if (!hasResult())
604	continue;
605	assert(GeneratedValue && "generatePerPart must produce a value");
606	assert((GeneratedValue->getType()->isVectorTy() ==
607	!GeneratesPerFirstLaneOnly \|\|
608	State.VF.isScalar()) &&
609	"scalar value but not only first lane defined");
610	State.set(Def: this, V: GeneratedValue, Part,
611	/IsScalar/ GeneratesPerFirstLaneOnly);
612	}
613	}
614
615	bool VPInstruction::onlyFirstLaneUsed(const VPValue Op) const* {
616	assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
617	if (Instruction::isBinaryOp(Opcode: getOpcode()))
618	return vputils::onlyFirstLaneUsed(Def: this);
619
620	switch (getOpcode()) {
621	default:
622	return false;
623	case Instruction::ICmp:
624	case VPInstruction::PtrAdd:
625	// TODO: Cover additional opcodes.
626	return vputils::onlyFirstLaneUsed(Def: this);
627	case VPInstruction::ActiveLaneMask:
628	case VPInstruction::ExplicitVectorLength:
629	case VPInstruction::CalculateTripCountMinusVF:
630	case VPInstruction::CanonicalIVIncrementForPart:
631	case VPInstruction::BranchOnCount:
632	return true;
633	};
634	llvm_unreachable("switch should return");
635	}
636
637	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
638	void VPInstruction::dump() const {
639	VPSlotTracker SlotTracker(getParent()->getPlan());
640	print(O&: dbgs(), Indent: "", SlotTracker);
641	}
642
643	void VPInstruction::print(raw_ostream &O, const Twine &Indent,
644	VPSlotTracker &SlotTracker) const {
645	O << Indent << "EMIT ";
646
647	if (hasResult()) {
648	printAsOperand(OS&: O, Tracker&: SlotTracker);
649	O << " = ";
650	}
651
652	switch (getOpcode()) {
653	case VPInstruction::Not:
654	O << "not";
655	break;
656	case VPInstruction::SLPLoad:
657	O << "combined load";
658	break;
659	case VPInstruction::SLPStore:
660	O << "combined store";
661	break;
662	case VPInstruction::ActiveLaneMask:
663	O << "active lane mask";
664	break;
665	case VPInstruction::ExplicitVectorLength:
666	O << "EXPLICIT-VECTOR-LENGTH";
667	break;
668	case VPInstruction::FirstOrderRecurrenceSplice:
669	O << "first-order splice";
670	break;
671	case VPInstruction::BranchOnCond:
672	O << "branch-on-cond";
673	break;
674	case VPInstruction::CalculateTripCountMinusVF:
675	O << "TC > VF ? TC - VF : 0";
676	break;
677	case VPInstruction::CanonicalIVIncrementForPart:
678	O << "VF * Part +";
679	break;
680	case VPInstruction::BranchOnCount:
681	O << "branch-on-count";
682	break;
683	case VPInstruction::ComputeReductionResult:
684	O << "compute-reduction-result";
685	break;
686	case VPInstruction::PtrAdd:
687	O << "ptradd";
688	break;
689	default:
690	O << Instruction::getOpcodeName(Opcode: getOpcode());
691	}
692
693	printFlags(O);
694	printOperands(O, SlotTracker);
695
696	if (auto DL = getDebugLoc()) {
697	O << ", !dbg ";
698	DL.print(OS&: O);
699	}
700	}
701	#endif
702
703	void VPWidenCallRecipe::execute(VPTransformState &State) {
704	assert(State.VF.isVector() && "not widening");
705	auto &CI = *cast<CallInst>(Val: getUnderlyingInstr());
706	assert(!isa<DbgInfoIntrinsic>(CI) &&
707	"DbgInfoIntrinsic should have been dropped during VPlan construction");
708	State.setDebugLocFrom(getDebugLoc());
709
710	bool UseIntrinsic = VectorIntrinsicID != Intrinsic::not_intrinsic;
711	FunctionType VFTy = nullptr*;
712	if (Variant)
713	VFTy = Variant->getFunctionType();
714	for (unsigned Part = `0`; Part < State.UF; ++Part) {
715	SmallVector<Type *, `2`> TysForDecl;
716	// Add return type if intrinsic is overloaded on it.
717	if (UseIntrinsic &&
718	isVectorIntrinsicWithOverloadTypeAtArg(ID: VectorIntrinsicID, OpdIdx: -`1`))
719	TysForDecl.push_back(
720	Elt: VectorType::get(ElementType: CI.getType()->getScalarType(), EC: State.VF));
721	SmallVector<Value *, `4`> Args;
722	for (const auto &I : enumerate(First: operands())) {
723	// Some intrinsics have a scalar argument - don't replace it with a
724	// vector.
725	Value *Arg;
726	if (UseIntrinsic &&
727	isVectorIntrinsicWithScalarOpAtArg(ID: VectorIntrinsicID, ScalarOpdIdx: I.index()))
728	Arg = State.get(Def: I.value(), Instance: VPIteration (`0`, `0`));
729	// Some vectorized function variants may also take a scalar argument,
730	// e.g. linear parameters for pointers. This needs to be the scalar value
731	// from the start of the respective part when interleaving.
732	else if (VFTy && !VFTy->getParamType(i: I.index())->isVectorTy())
733	Arg = State.get(Def: I.value(), Instance: VPIteration (Part, `0`));
734	else
735	Arg = State.get(Def: I.value(), Part);
736	if (UseIntrinsic &&
737	isVectorIntrinsicWithOverloadTypeAtArg(ID: VectorIntrinsicID, OpdIdx: I.index()))
738	TysForDecl.push_back(Elt: Arg->getType());
739	Args.push_back(Elt: Arg);
740	}
741
742	Function *VectorF;
743	if (UseIntrinsic) {
744	// Use vector version of the intrinsic.
745	Module *M = State.Builder.GetInsertBlock()->getModule();
746	VectorF = Intrinsic::getDeclaration(M, id: VectorIntrinsicID, Tys: TysForDecl);
747	assert(VectorF && "Can't retrieve vector intrinsic.");
748	} else {
749	#ifndef NDEBUG
750	assert(Variant != nullptr && "Can't create vector function.");
751	#endif
752	VectorF = Variant;
753	}
754
755	SmallVector<OperandBundleDef, `1`> OpBundles;
756	CI.getOperandBundlesAsDefs(Defs&: OpBundles);
757	CallInst *V = State.Builder.CreateCall(Callee: VectorF, Args, OpBundles);
758
759	if (isa<FPMathOperator>(Val: V))
760	V->copyFastMathFlags(I: &CI);
761
762	if (!V->getType()->isVoidTy())
763	State.set(Def: this, V, Part);
764	State.addMetadata(To: V, From: &CI);
765	}
766	}
767
768	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
769	void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
770	VPSlotTracker &SlotTracker) const {
771	O << Indent << "WIDEN-CALL ";
772
773	auto *CI = cast<CallInst>(Val: getUnderlyingInstr());
774	if (CI->getType()->isVoidTy())
775	O << "void ";
776	else {
777	printAsOperand(OS&: O, Tracker&: SlotTracker);
778	O << " = ";
779	}
780
781	O << "call @" << CI->getCalledFunction()->getName() << "(";
782	printOperands(O, SlotTracker);
783	O << ")";
784
785	if (VectorIntrinsicID)
786	O << " (using vector intrinsic)";
787	else {
788	O << " (using library function";
789	if (Variant->hasName())
790	O << ": " << Variant->getName();
791	O << ")";
792	}
793	}
794
795	void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
796	VPSlotTracker &SlotTracker) const {
797	O << Indent << "WIDEN-SELECT ";
798	printAsOperand(OS&: O, Tracker&: SlotTracker);
799	O << " = select ";
800	getOperand(N: `0`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
801	O << ", ";
802	getOperand(N: `1`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
803	O << ", ";
804	getOperand(N: `2`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
805	O << (isInvariantCond() ? " (condition is loop invariant)" : "");
806	}
807	#endif
808
809	void VPWidenSelectRecipe::execute(VPTransformState &State) {
810	State.setDebugLocFrom(getDebugLoc());
811
812	// The condition can be loop invariant but still defined inside the
813	// loop. This means that we can't just use the original 'cond' value.
814	// We have to take the 'vectorized' value and pick the first lane.
815	// Instcombine will make this a no-op.
816	auto *InvarCond =
817	isInvariantCond() ? State.get(Def: getCond(), Instance: VPIteration (`0`, `0`)) : nullptr;
818
819	for (unsigned Part = `0`; Part < State.UF; ++Part) {
820	Value *Cond = InvarCond ? InvarCond : State.get(Def: getCond(), Part);
821	Value *Op0 = State.get(Def: getOperand(N: `1`), Part);
822	Value *Op1 = State.get(Def: getOperand(N: `2`), Part);
823	Value *Sel = State.Builder.CreateSelect(C: Cond, True: Op0, False: Op1);
824	State.set(Def: this, V: Sel, Part);
825	State.addMetadata(To: Sel, From: dyn_cast_or_null<Instruction>(Val: getUnderlyingValue()));
826	}
827	}
828
829	VPRecipeWithIRFlags::FastMathFlagsTy::FastMathFlagsTy(
830	const FastMathFlags &FMF) {
831	AllowReassoc = FMF.allowReassoc();
832	NoNaNs = FMF.noNaNs();
833	NoInfs = FMF.noInfs();
834	NoSignedZeros = FMF.noSignedZeros();
835	AllowReciprocal = FMF.allowReciprocal();
836	AllowContract = FMF.allowContract();
837	ApproxFunc = FMF.approxFunc();
838	}
839
840	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
841	void VPRecipeWithIRFlags::printFlags(raw_ostream &O) const {
842	switch (OpType) {
843	case OperationType::Cmp:
844	O << " " << CmpInst::getPredicateName(P: getPredicate());
845	break;
846	case OperationType::DisjointOp:
847	if (DisjointFlags.IsDisjoint)
848	O << " disjoint";
849	break;
850	case OperationType::PossiblyExactOp:
851	if (ExactFlags.IsExact)
852	O << " exact";
853	break;
854	case OperationType::OverflowingBinOp:
855	if (WrapFlags.HasNUW)
856	O << " nuw";
857	if (WrapFlags.HasNSW)
858	O << " nsw";
859	break;
860	case OperationType::FPMathOp:
861	getFastMathFlags().print(O);
862	break;
863	case OperationType::GEPOp:
864	if (GEPFlags.IsInBounds)
865	O << " inbounds";
866	break;
867	case OperationType::NonNegOp:
868	if (NonNegFlags.NonNeg)
869	O << " nneg";
870	break;
871	case OperationType::Other:
872	break;
873	}
874	if (getNumOperands() > `0`)
875	O << " ";
876	}
877	#endif
878
879	void VPWidenRecipe::execute(VPTransformState &State) {
880	State.setDebugLocFrom(getDebugLoc());
881	auto &Builder = State.Builder;
882	switch (Opcode) {
883	case Instruction::Call:
884	case Instruction::Br:
885	case Instruction::PHI:
886	case Instruction::GetElementPtr:
887	case Instruction::Select:
888	llvm_unreachable("This instruction is handled by a different recipe.");
889	case Instruction::UDiv:
890	case Instruction::SDiv:
891	case Instruction::SRem:
892	case Instruction::URem:
893	case Instruction::Add:
894	case Instruction::FAdd:
895	case Instruction::Sub:
896	case Instruction::FSub:
897	case Instruction::FNeg:
898	case Instruction::Mul:
899	case Instruction::FMul:
900	case Instruction::FDiv:
901	case Instruction::FRem:
902	case Instruction::Shl:
903	case Instruction::LShr:
904	case Instruction::AShr:
905	case Instruction::And:
906	case Instruction::Or:
907	case Instruction::Xor: {
908	// Just widen unops and binops.
909	for (unsigned Part = `0`; Part < State.UF; ++Part) {
910	SmallVector<Value *, `2`> Ops;
911	for (VPValue *VPOp : operands())
912	Ops.push_back(Elt: State.get(Def: VPOp, Part));
913
914	Value *V = Builder.CreateNAryOp(Opc: Opcode, Ops);
915
916	if (auto *VecOp = dyn_cast<Instruction>(Val: V))
917	setFlags(VecOp);
918
919	// Use this vector value for all users of the original instruction.
920	State.set(Def: this, V, Part);
921	State.addMetadata(To: V, From: dyn_cast_or_null<Instruction>(Val: getUnderlyingValue()));
922	}
923
924	break;
925	}
926	case Instruction::Freeze: {
927	for (unsigned Part = `0`; Part < State.UF; ++Part) {
928	Value *Op = State.get(Def: getOperand(N: `0`), Part);
929
930	Value *Freeze = Builder.CreateFreeze(V: Op);
931	State.set(Def: this, V: Freeze, Part);
932	}
933	break;
934	}
935	case Instruction::ICmp:
936	case Instruction::FCmp: {
937	// Widen compares. Generate vector compares.
938	bool FCmp = Opcode == Instruction::FCmp;
939	for (unsigned Part = `0`; Part < State.UF; ++Part) {
940	Value *A = State.get(Def: getOperand(N: `0`), Part);
941	Value *B = State.get(Def: getOperand(N: `1`), Part);
942	Value C = nullptr*;
943	if (FCmp) {
944	// Propagate fast math flags.
945	IRBuilder<>::FastMathFlagGuard FMFG(Builder);
946	if (auto *I = dyn_cast_or_null<Instruction>(Val: getUnderlyingValue()))
947	Builder.setFastMathFlags(I->getFastMathFlags());
948	C = Builder.CreateFCmp(P: getPredicate(), LHS: A, RHS: B);
949	} else {
950	C = Builder.CreateICmp(P: getPredicate(), LHS: A, RHS: B);
951	}
952	State.set(Def: this, V: C, Part);
953	State.addMetadata(To: C, From: dyn_cast_or_null<Instruction>(Val: getUnderlyingValue()));
954	}
955
956	break;
957	}
958	default:
959	// This instruction is not vectorized by simple widening.
960	LLVM_DEBUG(dbgs() << "LV: Found an unhandled opcode : "
961	<< Instruction::getOpcodeName(Opcode));
962	llvm_unreachable("Unhandled instruction!");
963	} // end of switch.
964
965	#if !defined(NDEBUG)
966	// Verify that VPlan type inference results agree with the type of the
967	// generated values.
968	for (unsigned Part = `0`; Part < State.UF; ++Part) {
969	assert(VectorType::get(State.TypeAnalysis.inferScalarType(this),
970	State.VF) == State.get(this, Part)->getType() &&
971	"inferred type and type from generated instructions do not match");
972	}
973	#endif
974	}
975
976	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
977	void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
978	VPSlotTracker &SlotTracker) const {
979	O << Indent << "WIDEN ";
980	printAsOperand(OS&: O, Tracker&: SlotTracker);
981	O << " = " << Instruction::getOpcodeName(Opcode);
982	printFlags(O);
983	printOperands(O, SlotTracker);
984	}
985	#endif
986
987	void VPWidenCastRecipe::execute(VPTransformState &State) {
988	State.setDebugLocFrom(getDebugLoc());
989	auto &Builder = State.Builder;
990	/// Vectorize casts.
991	assert(State.VF.isVector() && "Not vectorizing?");
992	Type *DestTy = VectorType::get(ElementType: getResultType(), EC: State.VF);
993	VPValue *Op = getOperand(N: `0`);
994	for (unsigned Part = `0`; Part < State.UF; ++Part) {
995	if (Part > `0` && Op->isLiveIn()) {
996	// FIXME: Remove once explicit unrolling is implemented using VPlan.
997	State.set(Def: this, V: State.get(Def: this, Part: `0`), Part);
998	continue;
999	}
1000	Value *A = State.get(Def: Op, Part);
1001	Value *Cast = Builder.CreateCast(Op: Instruction::CastOps(Opcode), V: A, DestTy);
1002	State.set(Def: this, V: Cast, Part);
1003	State.addMetadata(To: Cast, From: cast_or_null<Instruction>(Val: getUnderlyingValue()));
1004	}
1005	}
1006
1007	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1008	void VPWidenCastRecipe::print(raw_ostream &O, const Twine &Indent,
1009	VPSlotTracker &SlotTracker) const {
1010	O << Indent << "WIDEN-CAST ";
1011	printAsOperand(OS&: O, Tracker&: SlotTracker);
1012	O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1013	printFlags(O);
1014	printOperands(O, SlotTracker);
1015	O << " to " << *getResultType();
1016	}
1017	#endif
1018
1019	/// This function adds
1020	/// (StartIdx Step, (StartIdx + 1) * Step, (StartIdx + 2) * Step, ...)*
1021	/// to each vector element of Val. The sequence starts at StartIndex.
1022	/// \p Opcode is relevant for FP induction variable.
1023	static Value getStepVector(Value Val, Value StartIdx, Value Step,
1024	Instruction::BinaryOps BinOp, ElementCount VF,
1025	IRBuilderBase &Builder) {
1026	assert(VF.isVector() && "only vector VFs are supported");
1027
1028	// Create and check the types.
1029	auto *ValVTy = cast<VectorType>(Val: Val->getType());
1030	ElementCount VLen = ValVTy->getElementCount();
1031
1032	Type *STy = Val->getType()->getScalarType();
1033	assert((STy->isIntegerTy() \|\| STy->isFloatingPointTy()) &&
1034	"Induction Step must be an integer or FP");
1035	assert(Step->getType() == STy && "Step has wrong type");
1036
1037	SmallVector<Constant *, `8`> Indices;
1038
1039	// Create a vector of consecutive numbers from zero to VF.
1040	VectorType *InitVecValVTy = ValVTy;
1041	if (STy->isFloatingPointTy()) {
1042	Type *InitVecValSTy =
1043	IntegerType::get(C&: STy->getContext(), NumBits: STy->getScalarSizeInBits());
1044	InitVecValVTy = VectorType::get(ElementType: InitVecValSTy, EC: VLen);
1045	}
1046	Value *InitVec = Builder.CreateStepVector(DstType: InitVecValVTy);
1047
1048	// Splat the StartIdx
1049	Value *StartIdxSplat = Builder.CreateVectorSplat(EC: VLen, V: StartIdx);
1050
1051	if (STy->isIntegerTy()) {
1052	InitVec = Builder.CreateAdd(LHS: InitVec, RHS: StartIdxSplat);
1053	Step = Builder.CreateVectorSplat(EC: VLen, V: Step);
1054	assert(Step->getType() == Val->getType() && "Invalid step vec");
1055	// FIXME: The newly created binary instructions should contain nsw/nuw
1056	// flags, which can be found from the original scalar operations.
1057	Step = Builder.CreateMul(LHS: InitVec, RHS: Step);
1058	return Builder.CreateAdd(LHS: Val, RHS: Step, Name: "induction");
1059	}
1060
1061	// Floating point induction.
1062	assert((BinOp == Instruction::FAdd \|\| BinOp == Instruction::FSub) &&
1063	"Binary Opcode should be specified for FP induction");
1064	InitVec = Builder.CreateUIToFP(V: InitVec, DestTy: ValVTy);
1065	InitVec = Builder.CreateFAdd(L: InitVec, R: StartIdxSplat);
1066
1067	Step = Builder.CreateVectorSplat(EC: VLen, V: Step);
1068	Value *MulOp = Builder.CreateFMul(L: InitVec, R: Step);
1069	return Builder.CreateBinOp(Opc: BinOp, LHS: Val, RHS: MulOp, Name: "induction");
1070	}
1071
1072	/// A helper function that returns an integer or floating-point constant with
1073	/// value C.
1074	static Constant getSignedIntOrFpConstant(Type Ty, int64_t C) {
1075	return Ty->isIntegerTy() ? ConstantInt::getSigned(Ty, V: C)
1076	: ConstantFP::get(Ty, V: C);
1077	}
1078
1079	static Value getRuntimeVFAsFloat(IRBuilderBase &B, Type FTy,
1080	ElementCount VF) {
1081	assert(FTy->isFloatingPointTy() && "Expected floating point type!");
1082	Type *IntTy = IntegerType::get(C&: FTy->getContext(), NumBits: FTy->getScalarSizeInBits());
1083	Value *RuntimeVF = getRuntimeVF(B, Ty: IntTy, VF);
1084	return B.CreateUIToFP(V: RuntimeVF, DestTy: FTy);
1085	}
1086
1087	void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
1088	assert(!State.Instance && "Int or FP induction being replicated.");
1089
1090	Value *Start = getStartValue()->getLiveInIRValue();
1091	const InductionDescriptor &ID = getInductionDescriptor();
1092	TruncInst *Trunc = getTruncInst();
1093	IRBuilderBase &Builder = State.Builder;
1094	assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
1095	assert(State.VF.isVector() && "must have vector VF");
1096
1097	// The value from the original loop to which we are mapping the new induction
1098	// variable.
1099	Instruction *EntryVal = Trunc ? cast<Instruction>(Val: Trunc) : IV;
1100
1101	// Fast-math-flags propagate from the original induction instruction.
1102	IRBuilder<>::FastMathFlagGuard FMFG(Builder);
1103	if (ID.getInductionBinOp() && isa<FPMathOperator>(Val: ID.getInductionBinOp()))
1104	Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
1105
1106	// Now do the actual transformations, and start with fetching the step value.
1107	Value *Step = State.get(Def: getStepValue(), Instance: VPIteration (`0`, `0`));
1108
1109	assert((isa<PHINode>(EntryVal) \|\| isa<TruncInst>(EntryVal)) &&
1110	"Expected either an induction phi-node or a truncate of it!");
1111
1112	// Construct the initial value of the vector IV in the vector loop preheader
1113	auto CurrIP = Builder.saveIP();
1114	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
1115	Builder.SetInsertPoint(VectorPH->getTerminator());
1116	if (isa<TruncInst>(Val: EntryVal)) {
1117	assert(Start->getType()->isIntegerTy() &&
1118	"Truncation requires an integer type");
1119	auto *TruncType = cast<IntegerType>(Val: EntryVal->getType());
1120	Step = Builder.CreateTrunc(V: Step, DestTy: TruncType);
1121	Start = Builder.CreateCast(Op: Instruction::Trunc, V: Start, DestTy: TruncType);
1122	}
1123
1124	Value *Zero = getSignedIntOrFpConstant(Ty: Start->getType(), C: `0`);
1125	Value *SplatStart = Builder.CreateVectorSplat(EC: State.VF, V: Start);
1126	Value *SteppedStart = getStepVector(
1127	Val: SplatStart, StartIdx: Zero, Step, BinOp: ID.getInductionOpcode(), VF: State.VF, Builder&: State.Builder);
1128
1129	// We create vector phi nodes for both integer and floating-point induction
1130	// variables. Here, we determine the kind of arithmetic we will perform.
1131	Instruction::BinaryOps AddOp;
1132	Instruction::BinaryOps MulOp;
1133	if (Step->getType()->isIntegerTy()) {
1134	AddOp = Instruction::Add;
1135	MulOp = Instruction::Mul;
1136	} else {
1137	AddOp = ID.getInductionOpcode();
1138	MulOp = Instruction::FMul;
1139	}
1140
1141	// Multiply the vectorization factor by the step using integer or
1142	// floating-point arithmetic as appropriate.
1143	Type *StepType = Step->getType();
1144	Value *RuntimeVF;
1145	if (Step->getType()->isFloatingPointTy())
1146	RuntimeVF = getRuntimeVFAsFloat(B&: Builder, FTy: StepType, VF: State.VF);
1147	else
1148	RuntimeVF = getRuntimeVF(B&: Builder, Ty: StepType, VF: State.VF);
1149	Value *Mul = Builder.CreateBinOp(Opc: MulOp, LHS: Step, RHS: RuntimeVF);
1150
1151	// Create a vector splat to use in the induction update.
1152	//
1153	// FIXME: If the step is non-constant, we create the vector splat with
1154	// IRBuilder. IRBuilder can constant-fold the multiply, but it doesn't
1155	// handle a constant vector splat.
1156	Value *SplatVF = isa<Constant>(Val: Mul)
1157	? ConstantVector::getSplat(EC: State.VF, Elt: cast<Constant>(Val: Mul))
1158	: Builder.CreateVectorSplat(EC: State.VF, V: Mul);
1159	Builder.restoreIP(IP: CurrIP);
1160
1161	// We may need to add the step a number of times, depending on the unroll
1162	// factor. The last of those goes into the PHI.
1163	PHINode *VecInd = PHINode::Create(Ty: SteppedStart->getType(), NumReservedValues: `2`, NameStr: "vec.ind");
1164	VecInd->insertBefore(InsertPos: State.CFG.PrevBB->getFirstInsertionPt());
1165	VecInd->setDebugLoc(EntryVal->getDebugLoc());
1166	Instruction *LastInduction = VecInd;
1167	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1168	State.set(Def: this, V: LastInduction, Part);
1169
1170	if (isa<TruncInst>(Val: EntryVal))
1171	State.addMetadata(To: LastInduction, From: EntryVal);
1172
1173	LastInduction = cast<Instruction>(
1174	Val: Builder.CreateBinOp(Opc: AddOp, LHS: LastInduction, RHS: SplatVF, Name: "step.add"));
1175	LastInduction->setDebugLoc(EntryVal->getDebugLoc());
1176	}
1177
1178	LastInduction->setName("vec.ind.next");
1179	VecInd->addIncoming(V: SteppedStart, BB: VectorPH);
1180	// Add induction update using an incorrect block temporarily. The phi node
1181	// will be fixed after VPlan execution. Note that at this point the latch
1182	// block cannot be used, as it does not exist yet.
1183	// TODO: Model increment value in VPlan, by turning the recipe into a
1184	// multi-def and a subclass of VPHeaderPHIRecipe.
1185	VecInd->addIncoming(V: LastInduction, BB: VectorPH);
1186	}
1187
1188	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1189	void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1190	VPSlotTracker &SlotTracker) const {
1191	O << Indent << "WIDEN-INDUCTION";
1192	if (getTruncInst()) {
1193	O << "\\l\"";
1194	O << " +\n" << Indent << "\" " << VPlanIngredient (IV) << "\\l\"";
1195	O << " +\n" << Indent << "\" ";
1196	getVPValue(I: `0`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
1197	} else
1198	O << " " << VPlanIngredient (IV);
1199
1200	O << ", ";
1201	getStepValue()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1202	}
1203	#endif
1204
1205	bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
1206	// The step may be defined by a recipe in the preheader (e.g. if it requires
1207	// SCEV expansion), but for the canonical induction the step is required to be
1208	// 1, which is represented as live-in.
1209	if (getStepValue()->getDefiningRecipe())
1210	return false;
1211	auto *StepC = dyn_cast<ConstantInt>(Val: getStepValue()->getLiveInIRValue());
1212	auto *StartC = dyn_cast<ConstantInt>(Val: getStartValue()->getLiveInIRValue());
1213	return StartC && StartC->isZero() && StepC && StepC->isOne();
1214	}
1215
1216	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1217	void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent,
1218	VPSlotTracker &SlotTracker) const {
1219	O << Indent;
1220	printAsOperand(OS&: O, Tracker&: SlotTracker);
1221	O << Indent << "= DERIVED-IV ";
1222	getStartValue()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1223	O << " + ";
1224	getOperand(N: `1`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
1225	O << " * ";
1226	getStepValue()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1227	}
1228	#endif
1229
1230	void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
1231	// Fast-math-flags propagate from the original induction instruction.
1232	IRBuilder<>::FastMathFlagGuard FMFG(State.Builder);
1233	if (hasFastMathFlags())
1234	State.Builder.setFastMathFlags(getFastMathFlags());
1235
1236	/// Compute scalar induction steps. \p ScalarIV is the scalar induction
1237	/// variable on which to base the steps, \p Step is the size of the step.
1238
1239	Value *BaseIV = State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`));
1240	Value *Step = State.get(Def: getStepValue(), Instance: VPIteration (`0`, `0`));
1241	IRBuilderBase &Builder = State.Builder;
1242
1243	// Ensure step has the same type as that of scalar IV.
1244	Type *BaseIVTy = BaseIV->getType()->getScalarType();
1245	assert(BaseIVTy == Step->getType() && "Types of BaseIV and Step must match!");
1246
1247	// We build scalar steps for both integer and floating-point induction
1248	// variables. Here, we determine the kind of arithmetic we will perform.
1249	Instruction::BinaryOps AddOp;
1250	Instruction::BinaryOps MulOp;
1251	if (BaseIVTy->isIntegerTy()) {
1252	AddOp = Instruction::Add;
1253	MulOp = Instruction::Mul;
1254	} else {
1255	AddOp = InductionOpcode;
1256	MulOp = Instruction::FMul;
1257	}
1258
1259	// Determine the number of scalars we need to generate for each unroll
1260	// iteration.
1261	bool FirstLaneOnly = vputils::onlyFirstLaneUsed(Def: this);
1262	// Compute the scalar steps and save the results in State.
1263	Type *IntStepTy =
1264	IntegerType::get(C&: BaseIVTy->getContext(), NumBits: BaseIVTy->getScalarSizeInBits());
1265	Type VecIVTy = nullptr*;
1266	Value UnitStepVec = nullptr, SplatStep = nullptr, SplatIV = nullptr*;
1267	if (!FirstLaneOnly && State.VF.isScalable()) {
1268	VecIVTy = VectorType::get(ElementType: BaseIVTy, EC: State.VF);
1269	UnitStepVec =
1270	Builder.CreateStepVector(DstType: VectorType::get(ElementType: IntStepTy, EC: State.VF));
1271	SplatStep = Builder.CreateVectorSplat(EC: State.VF, V: Step);
1272	SplatIV = Builder.CreateVectorSplat(EC: State.VF, V: BaseIV);
1273	}
1274
1275	unsigned StartPart = `0`;
1276	unsigned EndPart = State.UF;
1277	unsigned StartLane = `0`;
1278	unsigned EndLane = FirstLaneOnly ? `1` : State.VF.getKnownMinValue();
1279	if (State.Instance) {
1280	StartPart = State.Instance ->Part;
1281	EndPart = StartPart + `1`;
1282	StartLane = State.Instance ->Lane.getKnownLane();
1283	EndLane = StartLane + `1`;
1284	}
1285	for (unsigned Part = StartPart; Part < EndPart; ++Part) {
1286	Value *StartIdx0 = createStepForVF(B&: Builder, Ty: IntStepTy, VF: State.VF, Step: Part);
1287
1288	if (!FirstLaneOnly && State.VF.isScalable()) {
1289	auto *SplatStartIdx = Builder.CreateVectorSplat(EC: State.VF, V: StartIdx0);
1290	auto *InitVec = Builder.CreateAdd(LHS: SplatStartIdx, RHS: UnitStepVec);
1291	if (BaseIVTy->isFloatingPointTy())
1292	InitVec = Builder.CreateSIToFP(V: InitVec, DestTy: VecIVTy);
1293	auto *Mul = Builder.CreateBinOp(Opc: MulOp, LHS: InitVec, RHS: SplatStep);
1294	auto *Add = Builder.CreateBinOp(Opc: AddOp, LHS: SplatIV, RHS: Mul);
1295	State.set(Def: this, V: Add, Part);
1296	// It's useful to record the lane values too for the known minimum number
1297	// of elements so we do those below. This improves the code quality when
1298	// trying to extract the first element, for example.
1299	}
1300
1301	if (BaseIVTy->isFloatingPointTy())
1302	StartIdx0 = Builder.CreateSIToFP(V: StartIdx0, DestTy: BaseIVTy);
1303
1304	for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
1305	Value *StartIdx = Builder.CreateBinOp(
1306	Opc: AddOp, LHS: StartIdx0, RHS: getSignedIntOrFpConstant(Ty: BaseIVTy, C: Lane));
1307	// The step returned by `createStepForVF` is a runtime-evaluated value
1308	// when VF is scalable. Otherwise, it should be folded into a Constant.
1309	assert((State.VF.isScalable() \|\| isa<Constant>(StartIdx)) &&
1310	"Expected StartIdx to be folded to a constant when VF is not "
1311	"scalable");
1312	auto *Mul = Builder.CreateBinOp(Opc: MulOp, LHS: StartIdx, RHS: Step);
1313	auto *Add = Builder.CreateBinOp(Opc: AddOp, LHS: BaseIV, RHS: Mul);
1314	State.set(Def: this, V: Add, Instance: VPIteration (Part, Lane));
1315	}
1316	}
1317	}
1318
1319	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1320	void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent,
1321	VPSlotTracker &SlotTracker) const {
1322	O << Indent;
1323	printAsOperand(OS&: O, Tracker&: SlotTracker);
1324	O << " = SCALAR-STEPS ";
1325	printOperands(O, SlotTracker);
1326	}
1327	#endif
1328
1329	void VPWidenGEPRecipe::execute(VPTransformState &State) {
1330	assert(State.VF.isVector() && "not widening");
1331	auto *GEP = cast<GetElementPtrInst>(Val: getUnderlyingInstr());
1332	// Construct a vector GEP by widening the operands of the scalar GEP as
1333	// necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP
1334	// results in a vector of pointers when at least one operand of the GEP
1335	// is vector-typed. Thus, to keep the representation compact, we only use
1336	// vector-typed operands for loop-varying values.
1337
1338	if (areAllOperandsInvariant()) {
1339	// If we are vectorizing, but the GEP has only loop-invariant operands,
1340	// the GEP we build (by only using vector-typed operands for
1341	// loop-varying values) would be a scalar pointer. Thus, to ensure we
1342	// produce a vector of pointers, we need to either arbitrarily pick an
1343	// operand to broadcast, or broadcast a clone of the original GEP.
1344	// Here, we broadcast a clone of the original.
1345	//
1346	// TODO: If at some point we decide to scalarize instructions having
1347	// loop-invariant operands, this special case will no longer be
1348	// required. We would add the scalarization decision to
1349	// collectLoopScalars() and teach getVectorValue() to broadcast
1350	// the lane-zero scalar value.
1351	SmallVector<Value *> Ops;
1352	for (unsigned I = `0`, E = getNumOperands(); I != E; I++)
1353	Ops.push_back(Elt: State.get(Def: getOperand(N: I), Instance: VPIteration (`0`, `0`)));
1354
1355	auto *NewGEP =
1356	State.Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr: Ops [`0`],
1357	IdxList: ArrayRef(Ops).drop_front(), Name: "", IsInBounds: isInBounds());
1358	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1359	Value *EntryPart = State.Builder.CreateVectorSplat(EC: State.VF, V: NewGEP);
1360	State.set(Def: this, V: EntryPart, Part);
1361	State.addMetadata(To: EntryPart, From: GEP);
1362	}
1363	} else {
1364	// If the GEP has at least one loop-varying operand, we are sure to
1365	// produce a vector of pointers. But if we are only unrolling, we want
1366	// to produce a scalar GEP for each unroll part. Thus, the GEP we
1367	// produce with the code below will be scalar (if VF == 1) or vector
1368	// (otherwise). Note that for the unroll-only case, we still maintain
1369	// values in the vector mapping with initVector, as we do for other
1370	// instructions.
1371	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1372	// The pointer operand of the new GEP. If it's loop-invariant, we
1373	// won't broadcast it.
1374	auto *Ptr = isPointerLoopInvariant()
1375	? State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`))
1376	: State.get(Def: getOperand(N: `0`), Part);
1377
1378	// Collect all the indices for the new GEP. If any index is
1379	// loop-invariant, we won't broadcast it.
1380	SmallVector<Value *, `4`> Indices;
1381	for (unsigned I = `1`, E = getNumOperands(); I < E; I++) {
1382	VPValue *Operand = getOperand(N: I);
1383	if (isIndexLoopInvariant(I: I - `1`))
1384	Indices.push_back(Elt: State.get(Def: Operand, Instance: VPIteration (`0`, `0`)));
1385	else
1386	Indices.push_back(Elt: State.get(Def: Operand, Part));
1387	}
1388
1389	// Create the new GEP. Note that this GEP may be a scalar if VF == 1,
1390	// but it should be a vector, otherwise.
1391	auto *NewGEP = State.Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr,
1392	IdxList: Indices, Name: "", IsInBounds: isInBounds());
1393	assert((State.VF.isScalar() \|\| NewGEP->getType()->isVectorTy()) &&
1394	"NewGEP is not a pointer vector");
1395	State.set(Def: this, V: NewGEP, Part);
1396	State.addMetadata(To: NewGEP, From: GEP);
1397	}
1398	}
1399	}
1400
1401	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1402	void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
1403	VPSlotTracker &SlotTracker) const {
1404	O << Indent << "WIDEN-GEP ";
1405	O << (isPointerLoopInvariant() ? "Inv" : "Var");
1406	for (size_t I = `0`; I < getNumOperands() - `1`; ++I)
1407	O << "[" << (isIndexLoopInvariant(I) ? "Inv" : "Var") << "]";
1408
1409	O << " ";
1410	printAsOperand(OS&: O, Tracker&: SlotTracker);
1411	O << " = getelementptr";
1412	printFlags(O);
1413	printOperands(O, SlotTracker);
1414	}
1415	#endif
1416
1417	void VPVectorPointerRecipe ::execute(VPTransformState &State) {
1418	auto &Builder = State.Builder;
1419	State.setDebugLocFrom(getDebugLoc());
1420	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1421	// Calculate the pointer for the specific unroll-part.
1422	Value PartPtr = nullptr*;
1423	// Use i32 for the gep index type when the value is constant,
1424	// or query DataLayout for a more suitable index type otherwise.
1425	const DataLayout &DL =
1426	Builder.GetInsertBlock()->getModule()->getDataLayout();
1427	Type *IndexTy = State.VF.isScalable() && (IsReverse \|\| Part > `0`)
1428	? DL.getIndexType(PtrTy: IndexedTy->getPointerTo())
1429	: Builder.getInt32Ty();
1430	Value *Ptr = State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`));
1431	bool InBounds = isInBounds();
1432	if (IsReverse) {
1433	// If the address is consecutive but reversed, then the
1434	// wide store needs to start at the last vector element.
1435	// RunTimeVF = VScale VF.getKnownMinValue()*
1436	// For fixed-width VScale is 1, then RunTimeVF = VF.getKnownMinValue()
1437	Value *RunTimeVF = getRuntimeVF(B&: Builder, Ty: IndexTy, VF: State.VF);
1438	// NumElt = -Part RunTimeVF*
1439	Value *NumElt = Builder.CreateMul(
1440	LHS: ConstantInt::get(Ty: IndexTy, V: -(int64_t)Part), RHS: RunTimeVF);
1441	// LastLane = 1 - RunTimeVF
1442	Value *LastLane =
1443	Builder.CreateSub(LHS: ConstantInt::get(Ty: IndexTy, V: `1`), RHS: RunTimeVF);
1444	PartPtr = Builder.CreateGEP(Ty: IndexedTy, Ptr, IdxList: NumElt, Name: "", IsInBounds: InBounds);
1445	PartPtr = Builder.CreateGEP(Ty: IndexedTy, Ptr: PartPtr, IdxList: LastLane, Name: "", IsInBounds: InBounds);
1446	} else {
1447	Value *Increment = createStepForVF(B&: Builder, Ty: IndexTy, VF: State.VF, Step: Part);
1448	PartPtr = Builder.CreateGEP(Ty: IndexedTy, Ptr, IdxList: Increment, Name: "", IsInBounds: InBounds);
1449	}
1450
1451	State.set(Def: this, V: PartPtr, Part, /IsScalar/ true);
1452	}
1453	}
1454
1455	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1456	void VPVectorPointerRecipe::print(raw_ostream &O, const Twine &Indent,
1457	VPSlotTracker &SlotTracker) const {
1458	O << Indent;
1459	printAsOperand(OS&: O, Tracker&: SlotTracker);
1460	O << " = vector-pointer ";
1461	if (IsReverse)
1462	O << "(reverse) ";
1463
1464	printOperands(O, SlotTracker);
1465	}
1466	#endif
1467
1468	void VPBlendRecipe::execute(VPTransformState &State) {
1469	State.setDebugLocFrom(getDebugLoc());
1470	// We know that all PHIs in non-header blocks are converted into
1471	// selects, so we don't have to worry about the insertion order and we
1472	// can just use the builder.
1473	// At this point we generate the predication tree. There may be
1474	// duplications since this is a simple recursive scan, but future
1475	// optimizations will clean it up.
1476
1477	unsigned NumIncoming = getNumIncomingValues();
1478
1479	// Generate a sequence of selects of the form:
1480	// SELECT(Mask3, In3,
1481	// SELECT(Mask2, In2,
1482	// SELECT(Mask1, In1,
1483	// In0)))
1484	// Note that Mask0 is never used: lanes for which no path reaches this phi and
1485	// are essentially undef are taken from In0.
1486	VectorParts Entry(State.UF);
1487	for (unsigned In = `0`; In < NumIncoming; ++In) {
1488	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1489	// We might have single edge PHIs (blocks) - use an identity
1490	// 'select' for the first PHI operand.
1491	Value *In0 = State.get(Def: getIncomingValue(Idx: In), Part);
1492	if (In == `0`)
1493	Entry [Part] = In0; // Initialize with the first incoming value.
1494	else {
1495	// Select between the current value and the previous incoming edge
1496	// based on the incoming mask.
1497	Value *Cond = State.get(Def: getMask(Idx: In), Part);
1498	Entry [Part] =
1499	State.Builder.CreateSelect(C: Cond, True: In0, False: Entry [Part], Name: "predphi");
1500	}
1501	}
1502	}
1503	for (unsigned Part = `0`; Part < State.UF; ++Part)
1504	State.set(Def: this, V: Entry [Part], Part);
1505	}
1506
1507	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1508	void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
1509	VPSlotTracker &SlotTracker) const {
1510	O << Indent << "BLEND ";
1511	printAsOperand(OS&: O, Tracker&: SlotTracker);
1512	O << " =";
1513	if (getNumIncomingValues() == `1`) {
1514	// Not a User of any mask: not really blending, this is a
1515	// single-predecessor phi.
1516	O << " ";
1517	getIncomingValue(Idx: `0`)->printAsOperand(OS&: O, Tracker&: SlotTracker);
1518	} else {
1519	for (unsigned I = `0`, E = getNumIncomingValues(); I < E; ++I) {
1520	O << " ";
1521	getIncomingValue(Idx: I)->printAsOperand(OS&: O, Tracker&: SlotTracker);
1522	if (I == `0`)
1523	continue;
1524	O << "/";
1525	getMask(Idx: I)->printAsOperand(OS&: O, Tracker&: SlotTracker);
1526	}
1527	}
1528	}
1529	#endif
1530
1531	void VPReductionRecipe::execute(VPTransformState &State) {
1532	assert(!State.Instance && "Reduction being replicated.");
1533	Value PrevInChain = State.get(Def: getChainOp(), Part: `0`, /IsScalar/* true);
1534	RecurKind Kind = RdxDesc.getRecurrenceKind();
1535	// Propagate the fast-math flags carried by the underlying instruction.
1536	IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder);
1537	State.Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
1538	for (unsigned Part = `0`; Part < State.UF; ++Part) {
1539	Value *NewVecOp = State.get(Def: getVecOp(), Part);
1540	if (VPValue *Cond = getCondOp()) {
1541	Value *NewCond = State.get(Def: Cond, Part, IsScalar: State.VF.isScalar());
1542	VectorType *VecTy = dyn_cast<VectorType>(Val: NewVecOp->getType());
1543	Type *ElementTy = VecTy ? VecTy->getElementType() : NewVecOp->getType();
1544	Value *Iden = RdxDesc.getRecurrenceIdentity(K: Kind, Tp: ElementTy,
1545	FMF: RdxDesc.getFastMathFlags());
1546	if (State.VF.isVector()) {
1547	Iden = State.Builder.CreateVectorSplat(EC: VecTy->getElementCount(), V: Iden);
1548	}
1549
1550	Value *Select = State.Builder.CreateSelect(C: NewCond, True: NewVecOp, False: Iden);
1551	NewVecOp = Select;
1552	}
1553	Value *NewRed;
1554	Value *NextInChain;
1555	if (IsOrdered) {
1556	if (State.VF.isVector())
1557	NewRed = createOrderedReduction(B&: State.Builder, Desc: RdxDesc, Src: NewVecOp,
1558	Start: PrevInChain);
1559	else
1560	NewRed = State.Builder.CreateBinOp(
1561	Opc: (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), LHS: PrevInChain,
1562	RHS: NewVecOp);
1563	PrevInChain = NewRed;
1564	} else {
1565	PrevInChain = State.get(Def: getChainOp(), Part, /IsScalar/ true);
1566	NewRed = createTargetReduction(B&: State.Builder, Desc: RdxDesc, Src: NewVecOp);
1567	}
1568	if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind)) {
1569	NextInChain = createMinMaxOp(Builder&: State.Builder, RK: RdxDesc.getRecurrenceKind(),
1570	Left: NewRed, Right: PrevInChain);
1571	} else if (IsOrdered)
1572	NextInChain = NewRed;
1573	else
1574	NextInChain = State.Builder.CreateBinOp(
1575	Opc: (Instruction::BinaryOps)RdxDesc.getOpcode(Kind), LHS: NewRed, RHS: PrevInChain);
1576	State.set(Def: this, V: NextInChain, Part, /IsScalar/ true);
1577	}
1578	}
1579
1580	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1581	void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
1582	VPSlotTracker &SlotTracker) const {
1583	O << Indent << "REDUCE ";
1584	printAsOperand(OS&: O, Tracker&: SlotTracker);
1585	O << " = ";
1586	getChainOp()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1587	O << " +";
1588	if (isa<FPMathOperator>(Val: getUnderlyingInstr()))
1589	O << getUnderlyingInstr()->getFastMathFlags();
1590	O << " reduce." << Instruction::getOpcodeName(Opcode: RdxDesc.getOpcode()) << " (";
1591	getVecOp()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1592	if (getCondOp()) {
1593	O << ", ";
1594	getCondOp()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1595	}
1596	O << ")";
1597	if (RdxDesc.IntermediateStore)
1598	O << " (with final reduction value stored in invariant address sank "
1599	"outside of loop)";
1600	}
1601	#endif
1602
1603	bool VPReplicateRecipe::shouldPack() const {
1604	// Find if the recipe is used by a widened recipe via an intervening
1605	// VPPredInstPHIRecipe. In this case, also pack the scalar values in a vector.
1606	return any_of(Range: users(), P: [](const VPUser *U) {
1607	if (auto *PredR = dyn_cast<VPPredInstPHIRecipe>(Val: U))
1608	return any_of(Range: PredR->users(), P: [PredR](const VPUser *U) {
1609	return !U->usesScalars(Op: PredR);
1610	});
1611	return false;
1612	});
1613	}
1614
1615	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1616	void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
1617	VPSlotTracker &SlotTracker) const {
1618	O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
1619
1620	if (!getUnderlyingInstr()->getType()->isVoidTy()) {
1621	printAsOperand(OS&: O, Tracker&: SlotTracker);
1622	O << " = ";
1623	}
1624	if (auto *CB = dyn_cast<CallBase>(Val: getUnderlyingInstr())) {
1625	O << "call";
1626	printFlags(O);
1627	O << "@" << CB->getCalledFunction()->getName() << "(";
1628	interleaveComma(c: make_range(x: op_begin(), y: op_begin() + (getNumOperands() - `1`)),
1629	os&: O, each_fn: [&O, &SlotTracker](VPValue *Op) {
1630	Op->printAsOperand(OS&: O, Tracker&: SlotTracker);
1631	});
1632	O << ")";
1633	} else {
1634	O << Instruction::getOpcodeName(Opcode: getUnderlyingInstr()->getOpcode());
1635	printFlags(O);
1636	printOperands(O, SlotTracker);
1637	}
1638
1639	if (shouldPack())
1640	O << " (S->V)";
1641	}
1642	#endif
1643
1644	/// Checks if \p C is uniform across all VFs and UFs. It is considered as such
1645	/// if it is either defined outside the vector region or its operand is known to
1646	/// be uniform across all VFs and UFs (e.g. VPDerivedIV or VPCanonicalIVPHI).
1647	/// TODO: Uniformity should be associated with a VPValue and there should be a
1648	/// generic way to check.
1649	static bool isUniformAcrossVFsAndUFs(VPScalarCastRecipe *C) {
1650	return C->isDefinedOutsideVectorRegions() \|\|
1651	isa<VPDerivedIVRecipe>(Val: C->getOperand(N: `0`)) \|\|
1652	isa<VPCanonicalIVPHIRecipe>(Val: C->getOperand(N: `0`));
1653	}
1654
1655	Value VPScalarCastRecipe ::generate(VPTransformState &State, unsigned* Part) {
1656	assert(vputils::onlyFirstLaneUsed(this) &&
1657	"Codegen only implemented for first lane.");
1658	switch (Opcode) {
1659	case Instruction::SExt:
1660	case Instruction::ZExt:
1661	case Instruction::Trunc: {
1662	// Note: SExt/ZExt not used yet.
1663	Value *Op = State.get(Def: getOperand(N: `0`), Instance: VPIteration (Part, `0`));
1664	return State.Builder.CreateCast(Op: Instruction::CastOps(Opcode), V: Op, DestTy: ResultTy);
1665	}
1666	default:
1667	llvm_unreachable("opcode not implemented yet");
1668	}
1669	}
1670
1671	void VPScalarCastRecipe ::execute(VPTransformState &State) {
1672	bool IsUniformAcrossVFsAndUFs = isUniformAcrossVFsAndUFs(C: this);
1673	for (unsigned Part = `0`; Part != State.UF; ++Part) {
1674	Value *Res;
1675	// Only generate a single instance, if the recipe is uniform across UFs and
1676	// VFs.
1677	if (Part > `0` && IsUniformAcrossVFsAndUFs)
1678	Res = State.get(Def: this, Instance: VPIteration (`0`, `0`));
1679	else
1680	Res = generate(State, Part);
1681	State.set(Def: this, V: Res, Instance: VPIteration (Part, `0`));
1682	}
1683	}
1684
1685	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1686	void VPScalarCastRecipe ::print(raw_ostream &O, const Twine &Indent,
1687	VPSlotTracker &SlotTracker) const {
1688	O << Indent << "SCALAR-CAST ";
1689	printAsOperand(OS&: O, Tracker&: SlotTracker);
1690	O << " = " << Instruction::getOpcodeName(Opcode) << " ";
1691	printOperands(O, SlotTracker);
1692	O << " to " << *ResultTy;
1693	}
1694	#endif
1695
1696	void VPBranchOnMaskRecipe::execute(VPTransformState &State) {
1697	assert(State.Instance && "Branch on Mask works only on single instance.");
1698
1699	unsigned Part = State.Instance ->Part;
1700	unsigned Lane = State.Instance ->Lane.getKnownLane();
1701
1702	Value ConditionBit = nullptr*;
1703	VPValue *BlockInMask = getMask();
1704	if (BlockInMask) {
1705	ConditionBit = State.get(Def: BlockInMask, Part);
1706	if (ConditionBit->getType()->isVectorTy())
1707	ConditionBit = State.Builder.CreateExtractElement(
1708	Vec: ConditionBit, Idx: State.Builder.getInt32(C: Lane));
1709	} else // Block in mask is all-one.
1710	ConditionBit = State.Builder.getTrue();
1711
1712	// Replace the temporary unreachable terminator with a new conditional branch,
1713	// whose two destinations will be set later when they are created.
1714	auto *CurrentTerminator = State.CFG.PrevBB->getTerminator();
1715	assert(isa<UnreachableInst>(CurrentTerminator) &&
1716	"Expected to replace unreachable terminator with conditional branch.");
1717	auto CondBr = BranchInst::Create(IfTrue: State.CFG.PrevBB, IfFalse: nullptr*, Cond: ConditionBit);
1718	CondBr->setSuccessor(idx: `0`, NewSucc: nullptr);
1719	ReplaceInstWithInst(From: CurrentTerminator, To: CondBr);
1720	}
1721
1722	void VPPredInstPHIRecipe::execute(VPTransformState &State) {
1723	assert(State.Instance && "Predicated instruction PHI works per instance.");
1724	Instruction *ScalarPredInst =
1725	cast<Instruction>(Val: State.get(Def: getOperand(N: `0`), Instance: *State.Instance));
1726	BasicBlock *PredicatedBB = ScalarPredInst->getParent();
1727	BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor();
1728	assert(PredicatingBB && "Predicated block has no single predecessor.");
1729	assert(isa<VPReplicateRecipe>(getOperand(`0`)) &&
1730	"operand must be VPReplicateRecipe");
1731
1732	// By current pack/unpack logic we need to generate only a single phi node: if
1733	// a vector value for the predicated instruction exists at this point it means
1734	// the instruction has vector users only, and a phi for the vector value is
1735	// needed. In this case the recipe of the predicated instruction is marked to
1736	// also do that packing, thereby "hoisting" the insert-element sequence.
1737	// Otherwise, a phi node for the scalar value is needed.
1738	unsigned Part = State.Instance ->Part;
1739	if (State.hasVectorValue(Def: getOperand(N: `0`), Part)) {
1740	Value *VectorValue = State.get(Def: getOperand(N: `0`), Part);
1741	InsertElementInst *IEI = cast<InsertElementInst>(Val: VectorValue);
1742	PHINode *VPhi = State.Builder.CreatePHI(Ty: IEI->getType(), NumReservedValues: `2`);
1743	VPhi->addIncoming(V: IEI->getOperand(i_nocapture: `0`), BB: PredicatingBB); // Unmodified vector.
1744	VPhi->addIncoming(V: IEI, BB: PredicatedBB); // New vector with inserted element.
1745	if (State.hasVectorValue(Def: this, Part))
1746	State.reset(Def: this, V: VPhi, Part);
1747	else
1748	State.set(Def: this, V: VPhi, Part);
1749	// NOTE: Currently we need to update the value of the operand, so the next
1750	// predicated iteration inserts its generated value in the correct vector.
1751	State.reset(Def: getOperand(N: `0`), V: VPhi, Part);
1752	} else {
1753	Type *PredInstType = getOperand(N: `0`)->getUnderlyingValue()->getType();
1754	PHINode *Phi = State.Builder.CreatePHI(Ty: PredInstType, NumReservedValues: `2`);
1755	Phi->addIncoming(V: PoisonValue::get(T: ScalarPredInst->getType()),
1756	BB: PredicatingBB);
1757	Phi->addIncoming(V: ScalarPredInst, BB: PredicatedBB);
1758	if (State.hasScalarValue(Def: this, Instance: *State.Instance))
1759	State.reset(Def: this, V: Phi, Instance: *State.Instance);
1760	else
1761	State.set(Def: this, V: Phi, Instance: *State.Instance);
1762	// NOTE: Currently we need to update the value of the operand, so the next
1763	// predicated iteration inserts its generated value in the correct vector.
1764	State.reset(Def: getOperand(N: `0`), V: Phi, Instance: *State.Instance);
1765	}
1766	}
1767
1768	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1769	void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1770	VPSlotTracker &SlotTracker) const {
1771	O << Indent << "PHI-PREDICATED-INSTRUCTION ";
1772	printAsOperand(OS&: O, Tracker&: SlotTracker);
1773	O << " = ";
1774	printOperands(O, SlotTracker);
1775	}
1776
1777	void VPWidenLoadRecipe::print(raw_ostream &O, const Twine &Indent,
1778	VPSlotTracker &SlotTracker) const {
1779	O << Indent << "WIDEN ";
1780	printAsOperand(OS&: O, Tracker&: SlotTracker);
1781	O << " = load ";
1782	printOperands(O, SlotTracker);
1783	}
1784
1785	void VPWidenLoadEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1786	VPSlotTracker &SlotTracker) const {
1787	O << Indent << "WIDEN ";
1788	printAsOperand(OS&: O, Tracker&: SlotTracker);
1789	O << " = vp.load ";
1790	printOperands(O, SlotTracker);
1791	}
1792
1793	void VPWidenStoreRecipe::print(raw_ostream &O, const Twine &Indent,
1794	VPSlotTracker &SlotTracker) const {
1795	O << Indent << "WIDEN store ";
1796	printOperands(O, SlotTracker);
1797	}
1798
1799	void VPWidenStoreEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1800	VPSlotTracker &SlotTracker) const {
1801	O << Indent << "WIDEN vp.store ";
1802	printOperands(O, SlotTracker);
1803	}
1804	#endif
1805
1806	void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) {
1807	Value *Start = getStartValue()->getLiveInIRValue();
1808	PHINode *EntryPart = PHINode::Create(Ty: Start->getType(), NumReservedValues: `2`, NameStr: "index");
1809	EntryPart->insertBefore(InsertPos: State.CFG.PrevBB->getFirstInsertionPt());
1810
1811	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
1812	EntryPart->addIncoming(V: Start, BB: VectorPH);
1813	EntryPart->setDebugLoc(getDebugLoc());
1814	for (unsigned Part = `0`, UF = State.UF; Part < UF; ++Part)
1815	State.set(Def: this, V: EntryPart, Part, /IsScalar/ true);
1816	}
1817
1818	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1819	void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
1820	VPSlotTracker &SlotTracker) const {
1821	O << Indent << "EMIT ";
1822	printAsOperand(OS&: O, Tracker&: SlotTracker);
1823	O << " = CANONICAL-INDUCTION ";
1824	printOperands(O, SlotTracker);
1825	}
1826	#endif
1827
1828	bool VPCanonicalIVPHIRecipe::isCanonical(
1829	InductionDescriptor::InductionKind Kind, VPValue *Start,
1830	VPValue Step) const* {
1831	// Must be an integer induction.
1832	if (Kind != InductionDescriptor::IK_IntInduction)
1833	return false;
1834	// Start must match the start value of this canonical induction.
1835	if (Start != getStartValue())
1836	return false;
1837
1838	// If the step is defined by a recipe, it is not a ConstantInt.
1839	if (Step->getDefiningRecipe())
1840	return false;
1841
1842	ConstantInt *StepC = dyn_cast<ConstantInt>(Val: Step->getLiveInIRValue());
1843	return StepC && StepC->isOne();
1844	}
1845
1846	bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) {
1847	return IsScalarAfterVectorization &&
1848	(!IsScalable \|\| vputils::onlyFirstLaneUsed(Def: this));
1849	}
1850
1851	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1852	void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent,
1853	VPSlotTracker &SlotTracker) const {
1854	O << Indent << "EMIT ";
1855	printAsOperand(OS&: O, Tracker&: SlotTracker);
1856	O << " = WIDEN-POINTER-INDUCTION ";
1857	getStartValue()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1858	O << ", " << *IndDesc.getStep();
1859	}
1860	#endif
1861
1862	void VPExpandSCEVRecipe::execute(VPTransformState &State) {
1863	assert(!State.Instance && "cannot be used in per-lane");
1864	const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout();
1865	SCEVExpander Exp(SE, DL, "induction");
1866
1867	Value *Res = Exp.expandCodeFor(SH: Expr, Ty: Expr->getType(),
1868	I: &*State.Builder.GetInsertPoint());
1869	assert(!State.ExpandedSCEVs.contains(Expr) &&
1870	"Same SCEV expanded multiple times");
1871	State.ExpandedSCEVs [Expr] = Res;
1872	for (unsigned Part = `0`, UF = State.UF; Part < UF; ++Part)
1873	State.set(Def: this, V: Res, Instance: {Part, `0`});
1874	}
1875
1876	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1877	void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
1878	VPSlotTracker &SlotTracker) const {
1879	O << Indent << "EMIT ";
1880	getVPSingleValue()->printAsOperand(OS&: O, Tracker&: SlotTracker);
1881	O << " = EXPAND SCEV " << *Expr;
1882	}
1883	#endif
1884
1885	void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
1886	Value CanonicalIV = State.get(Def: getOperand(N: `0`), Part: `0`, /IsScalar/* true);
1887	Type *STy = CanonicalIV->getType();
1888	IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
1889	ElementCount VF = State.VF;
1890	Value *VStart = VF.isScalar()
1891	? CanonicalIV
1892	: Builder.CreateVectorSplat(EC: VF, V: CanonicalIV, Name: "broadcast");
1893	for (unsigned Part = `0`, UF = State.UF; Part < UF; ++Part) {
1894	Value *VStep = createStepForVF(B&: Builder, Ty: STy, VF, Step: Part);
1895	if (VF.isVector()) {
1896	VStep = Builder.CreateVectorSplat(EC: VF, V: VStep);
1897	VStep =
1898	Builder.CreateAdd(LHS: VStep, RHS: Builder.CreateStepVector(DstType: VStep->getType()));
1899	}
1900	Value *CanonicalVectorIV = Builder.CreateAdd(LHS: VStart, RHS: VStep, Name: "vec.iv");
1901	State.set(Def: this, V: CanonicalVectorIV, Part);
1902	}
1903	}
1904
1905	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1906	void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
1907	VPSlotTracker &SlotTracker) const {
1908	O << Indent << "EMIT ";
1909	printAsOperand(OS&: O, Tracker&: SlotTracker);
1910	O << " = WIDEN-CANONICAL-INDUCTION ";
1911	printOperands(O, SlotTracker);
1912	}
1913	#endif
1914
1915	void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) {
1916	auto &Builder = State.Builder;
1917	// Create a vector from the initial value.
1918	auto *VectorInit = getStartValue()->getLiveInIRValue();
1919
1920	Type *VecTy = State.VF.isScalar()
1921	? VectorInit->getType()
1922	: VectorType::get(ElementType: VectorInit->getType(), EC: State.VF);
1923
1924	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
1925	if (State.VF.isVector()) {
1926	auto *IdxTy = Builder.getInt32Ty();
1927	auto *One = ConstantInt::get(Ty: IdxTy, V: `1`);
1928	IRBuilder<>::InsertPointGuard Guard(Builder);
1929	Builder.SetInsertPoint(VectorPH->getTerminator());
1930	auto *RuntimeVF = getRuntimeVF(B&: Builder, Ty: IdxTy, VF: State.VF);
1931	auto *LastIdx = Builder.CreateSub(LHS: RuntimeVF, RHS: One);
1932	VectorInit = Builder.CreateInsertElement(
1933	Vec: PoisonValue::get(T: VecTy), NewElt: VectorInit, Idx: LastIdx, Name: "vector.recur.init");
1934	}
1935
1936	// Create a phi node for the new recurrence.
1937	PHINode *EntryPart = PHINode::Create(Ty: VecTy, NumReservedValues: `2`, NameStr: "vector.recur");
1938	EntryPart->insertBefore(InsertPos: State.CFG.PrevBB->getFirstInsertionPt());
1939	EntryPart->addIncoming(V: VectorInit, BB: VectorPH);
1940	State.set(Def: this, V: EntryPart, Part: `0`);
1941	}
1942
1943	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
1944	void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent,
1945	VPSlotTracker &SlotTracker) const {
1946	O << Indent << "FIRST-ORDER-RECURRENCE-PHI ";
1947	printAsOperand(OS&: O, Tracker&: SlotTracker);
1948	O << " = phi ";
1949	printOperands(O, SlotTracker);
1950	}
1951	#endif
1952
1953	void VPReductionPHIRecipe::execute(VPTransformState &State) {
1954	auto &Builder = State.Builder;
1955
1956	// Reductions do not have to start at zero. They can start with
1957	// any loop invariant values.
1958	VPValue *StartVPV = getStartValue();
1959	Value *StartV = StartVPV->getLiveInIRValue();
1960
1961	// In order to support recurrences we need to be able to vectorize Phi nodes.
1962	// Phi nodes have cycles, so we need to vectorize them in two stages. This is
1963	// stage #1: We create a new vector PHI node with no incoming edges. We'll use
1964	// this value when we vectorize all of the instructions that use the PHI.
1965	bool ScalarPHI = State.VF.isScalar() \|\| IsInLoop;
1966	Type *VecTy = ScalarPHI ? StartV->getType()
1967	: VectorType::get(ElementType: StartV->getType(), EC: State.VF);
1968
1969	BasicBlock *HeaderBB = State.CFG.PrevBB;
1970	assert(State.CurrentVectorLoop->getHeader() == HeaderBB &&
1971	"recipe must be in the vector loop header");
1972	unsigned LastPartForNewPhi = isOrdered() ? `1` : State.UF;
1973	for (unsigned Part = `0`; Part < LastPartForNewPhi; ++Part) {
1974	Instruction *EntryPart = PHINode::Create(Ty: VecTy, NumReservedValues: `2`, NameStr: "vec.phi");
1975	EntryPart->insertBefore(InsertPos: HeaderBB->getFirstInsertionPt());
1976	State.set(Def: this, V: EntryPart, Part, IsScalar: IsInLoop);
1977	}
1978
1979	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
1980
1981	Value Iden = nullptr*;
1982	RecurKind RK = RdxDesc.getRecurrenceKind();
1983	if (RecurrenceDescriptor::isMinMaxRecurrenceKind(Kind: RK) \|\|
1984	RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind: RK)) {
1985	// MinMax and AnyOf reductions have the start value as their identity.
1986	if (ScalarPHI) {
1987	Iden = StartV;
1988	} else {
1989	IRBuilderBase::InsertPointGuard IPBuilder(Builder);
1990	Builder.SetInsertPoint(VectorPH->getTerminator());
1991	StartV = Iden =
1992	Builder.CreateVectorSplat(EC: State.VF, V: StartV, Name: "minmax.ident");
1993	}
1994	} else {
1995	Iden = RdxDesc.getRecurrenceIdentity(K: RK, Tp: VecTy->getScalarType(),
1996	FMF: RdxDesc.getFastMathFlags());
1997
1998	if (!ScalarPHI) {
1999	Iden = Builder.CreateVectorSplat(EC: State.VF, V: Iden);
2000	IRBuilderBase::InsertPointGuard IPBuilder(Builder);
2001	Builder.SetInsertPoint(VectorPH->getTerminator());
2002	Constant *Zero = Builder.getInt32(C: `0`);
2003	StartV = Builder.CreateInsertElement(Vec: Iden, NewElt: StartV, Idx: Zero);
2004	}
2005	}
2006
2007	for (unsigned Part = `0`; Part < LastPartForNewPhi; ++Part) {
2008	Value EntryPart = State.get(Def: this*, Part, IsScalar: IsInLoop);
2009	// Make sure to add the reduction start value only to the
2010	// first unroll part.
2011	Value *StartVal = (Part == `0`) ? StartV : Iden;
2012	cast<PHINode>(Val: EntryPart)->addIncoming(V: StartVal, BB: VectorPH);
2013	}
2014	}
2015
2016	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2017	void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2018	VPSlotTracker &SlotTracker) const {
2019	O << Indent << "WIDEN-REDUCTION-PHI ";
2020
2021	printAsOperand(OS&: O, Tracker&: SlotTracker);
2022	O << " = phi ";
2023	printOperands(O, SlotTracker);
2024	}
2025	#endif
2026
2027	void VPWidenPHIRecipe::execute(VPTransformState &State) {
2028	assert(EnableVPlanNativePath &&
2029	"Non-native vplans are not expected to have VPWidenPHIRecipes.");
2030
2031	Value *Op0 = State.get(Def: getOperand(N: `0`), Part: `0`);
2032	Type *VecTy = Op0->getType();
2033	Value *VecPhi = State.Builder.CreatePHI(Ty: VecTy, NumReservedValues: `2`, Name: "vec.phi");
2034	State.set(Def: this, V: VecPhi, Part: `0`);
2035	}
2036
2037	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2038	void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2039	VPSlotTracker &SlotTracker) const {
2040	O << Indent << "WIDEN-PHI ";
2041
2042	auto *OriginalPhi = cast<PHINode>(Val: getUnderlyingValue());
2043	// Unless all incoming values are modeled in VPlan print the original PHI
2044	// directly.
2045	// TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
2046	// values as VPValues.
2047	if (getNumOperands() != OriginalPhi->getNumOperands()) {
2048	O << VPlanIngredient (OriginalPhi);
2049	return;
2050	}
2051
2052	printAsOperand(OS&: O, Tracker&: SlotTracker);
2053	O << " = phi ";
2054	printOperands(O, SlotTracker);
2055	}
2056	#endif
2057
2058	// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
2059	// remove VPActiveLaneMaskPHIRecipe.
2060	void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) {
2061	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
2062	for (unsigned Part = `0`, UF = State.UF; Part < UF; ++Part) {
2063	Value *StartMask = State.get(Def: getOperand(N: `0`), Part);
2064	PHINode *EntryPart =
2065	State.Builder.CreatePHI(Ty: StartMask->getType(), NumReservedValues: `2`, Name: "active.lane.mask");
2066	EntryPart->addIncoming(V: StartMask, BB: VectorPH);
2067	EntryPart->setDebugLoc(getDebugLoc());
2068	State.set(Def: this, V: EntryPart, Part);
2069	}
2070	}
2071
2072	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2073	void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2074	VPSlotTracker &SlotTracker) const {
2075	O << Indent << "ACTIVE-LANE-MASK-PHI ";
2076
2077	printAsOperand(OS&: O, Tracker&: SlotTracker);
2078	O << " = phi ";
2079	printOperands(O, SlotTracker);
2080	}
2081	#endif
2082
2083	void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) {
2084	BasicBlock VectorPH = State.CFG.getPreheaderBBFor(R: this*);
2085	assert(State.UF == `1` && "Expected unroll factor 1 for VP vectorization.");
2086	Value *Start = State.get(Def: getOperand(N: `0`), Instance: VPIteration (`0`, `0`));
2087	PHINode *EntryPart =
2088	State.Builder.CreatePHI(Ty: Start->getType(), NumReservedValues: `2`, Name: "evl.based.iv");
2089	EntryPart->addIncoming(V: Start, BB: VectorPH);
2090	EntryPart->setDebugLoc(getDebugLoc());
2091	State.set(Def: this, V: EntryPart, Part: `0`, /IsScalar=/true);
2092	}
2093
2094	#if !defined(NDEBUG) \|\| defined(LLVM_ENABLE_DUMP)
2095	void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent,
2096	VPSlotTracker &SlotTracker) const {
2097	O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI ";
2098
2099	printAsOperand(OS&: O, Tracker&: SlotTracker);
2100	O << " = phi ";
2101	printOperands(O, SlotTracker);
2102	}
2103	#endif
2104

source code of llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp