1 | //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This is the LLVM vectorization plan. It represents a candidate for |
11 | /// vectorization, allowing to plan and optimize how to vectorize a given loop |
12 | /// before generating LLVM-IR. |
13 | /// The vectorizer uses vectorization plans to estimate the costs of potential |
14 | /// candidates and if profitable to execute the desired plan, generating vector |
15 | /// LLVM-IR code. |
16 | /// |
17 | //===----------------------------------------------------------------------===// |
18 | |
19 | #include "VPlan.h" |
20 | #include "VPlanCFG.h" |
21 | #include "VPlanDominatorTree.h" |
22 | #include "VPlanPatternMatch.h" |
23 | #include "llvm/ADT/PostOrderIterator.h" |
24 | #include "llvm/ADT/STLExtras.h" |
25 | #include "llvm/ADT/SmallVector.h" |
26 | #include "llvm/ADT/StringExtras.h" |
27 | #include "llvm/ADT/Twine.h" |
28 | #include "llvm/Analysis/LoopInfo.h" |
29 | #include "llvm/IR/BasicBlock.h" |
30 | #include "llvm/IR/CFG.h" |
31 | #include "llvm/IR/IRBuilder.h" |
32 | #include "llvm/IR/Instruction.h" |
33 | #include "llvm/IR/Instructions.h" |
34 | #include "llvm/IR/Type.h" |
35 | #include "llvm/IR/Value.h" |
36 | #include "llvm/Support/Casting.h" |
37 | #include "llvm/Support/CommandLine.h" |
38 | #include "llvm/Support/Debug.h" |
39 | #include "llvm/Support/GenericDomTreeConstruction.h" |
40 | #include "llvm/Support/GraphWriter.h" |
41 | #include "llvm/Support/raw_ostream.h" |
42 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
43 | #include "llvm/Transforms/Utils/LoopVersioning.h" |
44 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
45 | #include <cassert> |
46 | #include <string> |
47 | #include <vector> |
48 | |
49 | using namespace llvm; |
50 | using namespace llvm::VPlanPatternMatch; |
51 | |
52 | namespace llvm { |
53 | extern cl::opt<bool> EnableVPlanNativePath; |
54 | } |
55 | |
56 | #define DEBUG_TYPE "vplan" |
57 | |
58 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
59 | raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) { |
60 | const VPInstruction *Instr = dyn_cast<VPInstruction>(Val: &V); |
61 | VPSlotTracker SlotTracker( |
62 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); |
63 | V.print(OS, Tracker&: SlotTracker); |
64 | return OS; |
65 | } |
66 | #endif |
67 | |
68 | Value *VPLane::getAsRuntimeExpr(IRBuilderBase &Builder, |
69 | const ElementCount &VF) const { |
70 | switch (LaneKind) { |
71 | case VPLane::Kind::ScalableLast: |
72 | // Lane = RuntimeVF - VF.getKnownMinValue() + Lane |
73 | return Builder.CreateSub(LHS: getRuntimeVF(B&: Builder, Ty: Builder.getInt32Ty(), VF), |
74 | RHS: Builder.getInt32(C: VF.getKnownMinValue() - Lane)); |
75 | case VPLane::Kind::First: |
76 | return Builder.getInt32(C: Lane); |
77 | } |
78 | llvm_unreachable("Unknown lane kind" ); |
79 | } |
80 | |
81 | VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def) |
82 | : SubclassID(SC), UnderlyingVal(UV), Def(Def) { |
83 | if (Def) |
84 | Def->addDefinedValue(V: this); |
85 | } |
86 | |
87 | VPValue::~VPValue() { |
88 | assert(Users.empty() && "trying to delete a VPValue with remaining users" ); |
89 | if (Def) |
90 | Def->removeDefinedValue(V: this); |
91 | } |
92 | |
93 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
94 | void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const { |
95 | if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Val: Def)) |
96 | R->print(O&: OS, Indent: "" , SlotTracker); |
97 | else |
98 | printAsOperand(OS, Tracker&: SlotTracker); |
99 | } |
100 | |
101 | void VPValue::dump() const { |
102 | const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(Val: this->Def); |
103 | VPSlotTracker SlotTracker( |
104 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); |
105 | print(OS&: dbgs(), SlotTracker); |
106 | dbgs() << "\n" ; |
107 | } |
108 | |
109 | void VPDef::dump() const { |
110 | const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(Val: this); |
111 | VPSlotTracker SlotTracker( |
112 | (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr); |
113 | print(O&: dbgs(), Indent: "" , SlotTracker); |
114 | dbgs() << "\n" ; |
115 | } |
116 | #endif |
117 | |
118 | VPRecipeBase *VPValue::getDefiningRecipe() { |
119 | return cast_or_null<VPRecipeBase>(Val: Def); |
120 | } |
121 | |
122 | const VPRecipeBase *VPValue::getDefiningRecipe() const { |
123 | return cast_or_null<VPRecipeBase>(Val: Def); |
124 | } |
125 | |
126 | // Get the top-most entry block of \p Start. This is the entry block of the |
127 | // containing VPlan. This function is templated to support both const and non-const blocks |
128 | template <typename T> static T *getPlanEntry(T *Start) { |
129 | T *Next = Start; |
130 | T *Current = Start; |
131 | while ((Next = Next->getParent())) |
132 | Current = Next; |
133 | |
134 | SmallSetVector<T *, 8> WorkList; |
135 | WorkList.insert(Current); |
136 | |
137 | for (unsigned i = 0; i < WorkList.size(); i++) { |
138 | T *Current = WorkList[i]; |
139 | if (Current->getNumPredecessors() == 0) |
140 | return Current; |
141 | auto &Predecessors = Current->getPredecessors(); |
142 | WorkList.insert(Predecessors.begin(), Predecessors.end()); |
143 | } |
144 | |
145 | llvm_unreachable("VPlan without any entry node without predecessors" ); |
146 | } |
147 | |
148 | VPlan *VPBlockBase::getPlan() { return getPlanEntry(Start: this)->Plan; } |
149 | |
150 | const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(Start: this)->Plan; } |
151 | |
152 | /// \return the VPBasicBlock that is the entry of Block, possibly indirectly. |
153 | const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const { |
154 | const VPBlockBase *Block = this; |
155 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
156 | Block = Region->getEntry(); |
157 | return cast<VPBasicBlock>(Val: Block); |
158 | } |
159 | |
160 | VPBasicBlock *VPBlockBase::getEntryBasicBlock() { |
161 | VPBlockBase *Block = this; |
162 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
163 | Block = Region->getEntry(); |
164 | return cast<VPBasicBlock>(Val: Block); |
165 | } |
166 | |
167 | void VPBlockBase::setPlan(VPlan *ParentPlan) { |
168 | assert( |
169 | (ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && |
170 | "Can only set plan on its entry or preheader block." ); |
171 | Plan = ParentPlan; |
172 | } |
173 | |
174 | /// \return the VPBasicBlock that is the exit of Block, possibly indirectly. |
175 | const VPBasicBlock *VPBlockBase::getExitingBasicBlock() const { |
176 | const VPBlockBase *Block = this; |
177 | while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
178 | Block = Region->getExiting(); |
179 | return cast<VPBasicBlock>(Val: Block); |
180 | } |
181 | |
182 | VPBasicBlock *VPBlockBase::getExitingBasicBlock() { |
183 | VPBlockBase *Block = this; |
184 | while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
185 | Block = Region->getExiting(); |
186 | return cast<VPBasicBlock>(Val: Block); |
187 | } |
188 | |
189 | VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() { |
190 | if (!Successors.empty() || !Parent) |
191 | return this; |
192 | assert(Parent->getExiting() == this && |
193 | "Block w/o successors not the exiting block of its parent." ); |
194 | return Parent->getEnclosingBlockWithSuccessors(); |
195 | } |
196 | |
197 | VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() { |
198 | if (!Predecessors.empty() || !Parent) |
199 | return this; |
200 | assert(Parent->getEntry() == this && |
201 | "Block w/o predecessors not the entry of its parent." ); |
202 | return Parent->getEnclosingBlockWithPredecessors(); |
203 | } |
204 | |
205 | void VPBlockBase::deleteCFG(VPBlockBase *Entry) { |
206 | for (VPBlockBase *Block : to_vector(Range: vp_depth_first_shallow(G: Entry))) |
207 | delete Block; |
208 | } |
209 | |
210 | VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() { |
211 | iterator It = begin(); |
212 | while (It != end() && It->isPhi()) |
213 | It++; |
214 | return It; |
215 | } |
216 | |
217 | VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI, |
218 | DominatorTree *DT, IRBuilderBase &Builder, |
219 | InnerLoopVectorizer *ILV, VPlan *Plan, |
220 | LLVMContext &Ctx) |
221 | : VF(VF), UF(UF), LI(LI), DT(DT), Builder(Builder), ILV(ILV), Plan(Plan), |
222 | LVer(nullptr), |
223 | TypeAnalysis(Plan->getCanonicalIV()->getScalarType(), Ctx) {} |
224 | |
225 | Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) { |
226 | if (Def->isLiveIn()) |
227 | return Def->getLiveInIRValue(); |
228 | |
229 | if (hasScalarValue(Def, Instance)) { |
230 | return Data |
231 | .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)]; |
232 | } |
233 | |
234 | assert(hasVectorValue(Def, Instance.Part)); |
235 | auto *VecPart = Data.PerPartOutput[Def][Instance.Part]; |
236 | if (!VecPart->getType()->isVectorTy()) { |
237 | assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar" ); |
238 | return VecPart; |
239 | } |
240 | // TODO: Cache created scalar values. |
241 | Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF); |
242 | auto * = Builder.CreateExtractElement(Vec: VecPart, Idx: Lane); |
243 | // set(Def, Extract, Instance); |
244 | return Extract; |
245 | } |
246 | |
247 | Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) { |
248 | if (NeedsScalar) { |
249 | assert((VF.isScalar() || Def->isLiveIn() || |
250 | (hasScalarValue(Def, VPIteration(Part, 0)) && |
251 | Data.PerPartScalars[Def][Part].size() == 1)) && |
252 | "Trying to access a single scalar per part but has multiple scalars " |
253 | "per part." ); |
254 | return get(Def, Instance: VPIteration(Part, 0)); |
255 | } |
256 | |
257 | // If Values have been set for this Def return the one relevant for \p Part. |
258 | if (hasVectorValue(Def, Part)) |
259 | return Data.PerPartOutput[Def][Part]; |
260 | |
261 | auto GetBroadcastInstrs = [this, Def](Value *V) { |
262 | bool SafeToHoist = Def->isDefinedOutsideVectorRegions(); |
263 | if (VF.isScalar()) |
264 | return V; |
265 | // Place the code for broadcasting invariant variables in the new preheader. |
266 | IRBuilder<>::InsertPointGuard Guard(Builder); |
267 | if (SafeToHoist) { |
268 | BasicBlock * = CFG.VPBB2IRBB[cast<VPBasicBlock>( |
269 | Val: Plan->getVectorLoopRegion()->getSinglePredecessor())]; |
270 | if (LoopVectorPreHeader) |
271 | Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator()); |
272 | } |
273 | |
274 | // Place the code for broadcasting invariant variables in the new preheader. |
275 | // Broadcast the scalar into all locations in the vector. |
276 | Value *Shuf = Builder.CreateVectorSplat(EC: VF, V, Name: "broadcast" ); |
277 | |
278 | return Shuf; |
279 | }; |
280 | |
281 | if (!hasScalarValue(Def, Instance: {Part, 0})) { |
282 | assert(Def->isLiveIn() && "expected a live-in" ); |
283 | if (Part != 0) |
284 | return get(Def, Part: 0); |
285 | Value *IRV = Def->getLiveInIRValue(); |
286 | Value *B = GetBroadcastInstrs(IRV); |
287 | set(Def, V: B, Part); |
288 | return B; |
289 | } |
290 | |
291 | Value *ScalarValue = get(Def, Instance: {Part, 0}); |
292 | // If we aren't vectorizing, we can just copy the scalar map values over |
293 | // to the vector map. |
294 | if (VF.isScalar()) { |
295 | set(Def, V: ScalarValue, Part); |
296 | return ScalarValue; |
297 | } |
298 | |
299 | bool IsUniform = vputils::isUniformAfterVectorization(VPV: Def); |
300 | |
301 | unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue() - 1; |
302 | // Check if there is a scalar value for the selected lane. |
303 | if (!hasScalarValue(Def, Instance: {Part, LastLane})) { |
304 | // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and |
305 | // VPExpandSCEVRecipes can also be uniform. |
306 | assert((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe()) || |
307 | isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe()) || |
308 | isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe())) && |
309 | "unexpected recipe found to be invariant" ); |
310 | IsUniform = true; |
311 | LastLane = 0; |
312 | } |
313 | |
314 | auto *LastInst = cast<Instruction>(Val: get(Def, Instance: {Part, LastLane})); |
315 | // Set the insert point after the last scalarized instruction or after the |
316 | // last PHI, if LastInst is a PHI. This ensures the insertelement sequence |
317 | // will directly follow the scalar definitions. |
318 | auto OldIP = Builder.saveIP(); |
319 | auto NewIP = |
320 | isa<PHINode>(Val: LastInst) |
321 | ? BasicBlock::iterator(LastInst->getParent()->getFirstNonPHI()) |
322 | : std::next(x: BasicBlock::iterator(LastInst)); |
323 | Builder.SetInsertPoint(&*NewIP); |
324 | |
325 | // However, if we are vectorizing, we need to construct the vector values. |
326 | // If the value is known to be uniform after vectorization, we can just |
327 | // broadcast the scalar value corresponding to lane zero for each unroll |
328 | // iteration. Otherwise, we construct the vector values using |
329 | // insertelement instructions. Since the resulting vectors are stored in |
330 | // State, we will only generate the insertelements once. |
331 | Value *VectorValue = nullptr; |
332 | if (IsUniform) { |
333 | VectorValue = GetBroadcastInstrs(ScalarValue); |
334 | set(Def, V: VectorValue, Part); |
335 | } else { |
336 | // Initialize packing with insertelements to start from undef. |
337 | assert(!VF.isScalable() && "VF is assumed to be non scalable." ); |
338 | Value *Undef = PoisonValue::get(T: VectorType::get(ElementType: LastInst->getType(), EC: VF)); |
339 | set(Def, V: Undef, Part); |
340 | for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) |
341 | packScalarIntoVectorValue(Def, Instance: {Part, Lane}); |
342 | VectorValue = get(Def, Part); |
343 | } |
344 | Builder.restoreIP(IP: OldIP); |
345 | return VectorValue; |
346 | } |
347 | |
348 | BasicBlock *VPTransformState::CFGState::(VPRecipeBase *R) { |
349 | VPRegionBlock *LoopRegion = R->getParent()->getEnclosingLoopRegion(); |
350 | return VPBB2IRBB[LoopRegion->getPreheaderVPBB()]; |
351 | } |
352 | |
353 | void VPTransformState::addNewMetadata(Instruction *To, |
354 | const Instruction *Orig) { |
355 | // If the loop was versioned with memchecks, add the corresponding no-alias |
356 | // metadata. |
357 | if (LVer && (isa<LoadInst>(Val: Orig) || isa<StoreInst>(Val: Orig))) |
358 | LVer->annotateInstWithNoAlias(VersionedInst: To, OrigInst: Orig); |
359 | } |
360 | |
361 | void VPTransformState::addMetadata(Value *To, Instruction *From) { |
362 | // No source instruction to transfer metadata from? |
363 | if (!From) |
364 | return; |
365 | |
366 | if (Instruction *ToI = dyn_cast<Instruction>(Val: To)) { |
367 | propagateMetadata(I: ToI, VL: From); |
368 | addNewMetadata(To: ToI, Orig: From); |
369 | } |
370 | } |
371 | |
372 | void VPTransformState::setDebugLocFrom(DebugLoc DL) { |
373 | const DILocation *DIL = DL; |
374 | // When a FSDiscriminator is enabled, we don't need to add the multiply |
375 | // factors to the discriminators. |
376 | if (DIL && |
377 | Builder.GetInsertBlock() |
378 | ->getParent() |
379 | ->shouldEmitDebugInfoForProfiling() && |
380 | !EnableFSDiscriminator) { |
381 | // FIXME: For scalable vectors, assume vscale=1. |
382 | auto NewDIL = |
383 | DIL->cloneByMultiplyingDuplicationFactor(DF: UF * VF.getKnownMinValue()); |
384 | if (NewDIL) |
385 | Builder.SetCurrentDebugLocation(*NewDIL); |
386 | else |
387 | LLVM_DEBUG(dbgs() << "Failed to create new discriminator: " |
388 | << DIL->getFilename() << " Line: " << DIL->getLine()); |
389 | } else |
390 | Builder.SetCurrentDebugLocation(DIL); |
391 | } |
392 | |
393 | void VPTransformState::packScalarIntoVectorValue(VPValue *Def, |
394 | const VPIteration &Instance) { |
395 | Value *ScalarInst = get(Def, Instance); |
396 | Value *VectorValue = get(Def, Part: Instance.Part); |
397 | VectorValue = Builder.CreateInsertElement( |
398 | Vec: VectorValue, NewElt: ScalarInst, Idx: Instance.Lane.getAsRuntimeExpr(Builder, VF)); |
399 | set(Def, V: VectorValue, Part: Instance.Part); |
400 | } |
401 | |
402 | BasicBlock * |
403 | VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) { |
404 | // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks. |
405 | // Pred stands for Predessor. Prev stands for Previous - last visited/created. |
406 | BasicBlock *PrevBB = CFG.PrevBB; |
407 | BasicBlock *NewBB = BasicBlock::Create(Context&: PrevBB->getContext(), Name: getName(), |
408 | Parent: PrevBB->getParent(), InsertBefore: CFG.ExitBB); |
409 | LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n'); |
410 | |
411 | // Hook up the new basic block to its predecessors. |
412 | for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) { |
413 | VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock(); |
414 | auto &PredVPSuccessors = PredVPBB->getHierarchicalSuccessors(); |
415 | BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB]; |
416 | |
417 | assert(PredBB && "Predecessor basic-block not found building successor." ); |
418 | auto *PredBBTerminator = PredBB->getTerminator(); |
419 | LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n'); |
420 | |
421 | auto *TermBr = dyn_cast<BranchInst>(Val: PredBBTerminator); |
422 | if (isa<UnreachableInst>(Val: PredBBTerminator)) { |
423 | assert(PredVPSuccessors.size() == 1 && |
424 | "Predecessor ending w/o branch must have single successor." ); |
425 | DebugLoc DL = PredBBTerminator->getDebugLoc(); |
426 | PredBBTerminator->eraseFromParent(); |
427 | auto *Br = BranchInst::Create(IfTrue: NewBB, InsertAtEnd: PredBB); |
428 | Br->setDebugLoc(DL); |
429 | } else if (TermBr && !TermBr->isConditional()) { |
430 | TermBr->setSuccessor(idx: 0, NewSucc: NewBB); |
431 | } else { |
432 | // Set each forward successor here when it is created, excluding |
433 | // backedges. A backward successor is set when the branch is created. |
434 | unsigned idx = PredVPSuccessors.front() == this ? 0 : 1; |
435 | assert(!TermBr->getSuccessor(idx) && |
436 | "Trying to reset an existing successor block." ); |
437 | TermBr->setSuccessor(idx, NewSucc: NewBB); |
438 | } |
439 | } |
440 | return NewBB; |
441 | } |
442 | |
443 | void VPBasicBlock::execute(VPTransformState *State) { |
444 | bool Replica = State->Instance && !State->Instance->isFirstIteration(); |
445 | VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; |
446 | VPBlockBase *SingleHPred = nullptr; |
447 | BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. |
448 | |
449 | auto IsLoopRegion = [](VPBlockBase *BB) { |
450 | auto *R = dyn_cast<VPRegionBlock>(Val: BB); |
451 | return R && !R->isReplicator(); |
452 | }; |
453 | |
454 | // 1. Create an IR basic block, or reuse the last one or ExitBB if possible. |
455 | if (getPlan()->getVectorLoopRegion()->getSingleSuccessor() == this) { |
456 | // ExitBB can be re-used for the exit block of the Plan. |
457 | NewBB = State->CFG.ExitBB; |
458 | State->CFG.PrevBB = NewBB; |
459 | State->Builder.SetInsertPoint(NewBB->getFirstNonPHI()); |
460 | |
461 | // Update the branch instruction in the predecessor to branch to ExitBB. |
462 | VPBlockBase *PredVPB = getSingleHierarchicalPredecessor(); |
463 | VPBasicBlock *ExitingVPBB = PredVPB->getExitingBasicBlock(); |
464 | assert(PredVPB->getSingleSuccessor() == this && |
465 | "predecessor must have the current block as only successor" ); |
466 | BasicBlock *ExitingBB = State->CFG.VPBB2IRBB[ExitingVPBB]; |
467 | // The Exit block of a loop is always set to be successor 0 of the Exiting |
468 | // block. |
469 | cast<BranchInst>(Val: ExitingBB->getTerminator())->setSuccessor(idx: 0, NewSucc: NewBB); |
470 | } else if (PrevVPBB && /* A */ |
471 | !((SingleHPred = getSingleHierarchicalPredecessor()) && |
472 | SingleHPred->getExitingBasicBlock() == PrevVPBB && |
473 | PrevVPBB->getSingleHierarchicalSuccessor() && |
474 | (SingleHPred->getParent() == getEnclosingLoopRegion() && |
475 | !IsLoopRegion(SingleHPred))) && /* B */ |
476 | !(Replica && getPredecessors().empty())) { /* C */ |
477 | // The last IR basic block is reused, as an optimization, in three cases: |
478 | // A. the first VPBB reuses the loop pre-header BB - when PrevVPBB is null; |
479 | // B. when the current VPBB has a single (hierarchical) predecessor which |
480 | // is PrevVPBB and the latter has a single (hierarchical) successor which |
481 | // both are in the same non-replicator region; and |
482 | // C. when the current VPBB is an entry of a region replica - where PrevVPBB |
483 | // is the exiting VPBB of this region from a previous instance, or the |
484 | // predecessor of this region. |
485 | |
486 | NewBB = createEmptyBasicBlock(CFG&: State->CFG); |
487 | State->Builder.SetInsertPoint(NewBB); |
488 | // Temporarily terminate with unreachable until CFG is rewired. |
489 | UnreachableInst *Terminator = State->Builder.CreateUnreachable(); |
490 | // Register NewBB in its loop. In innermost loops its the same for all |
491 | // BB's. |
492 | if (State->CurrentVectorLoop) |
493 | State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, LI&: *State->LI); |
494 | State->Builder.SetInsertPoint(Terminator); |
495 | State->CFG.PrevBB = NewBB; |
496 | } |
497 | |
498 | // 2. Fill the IR basic block with IR instructions. |
499 | LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName() |
500 | << " in BB:" << NewBB->getName() << '\n'); |
501 | |
502 | State->CFG.VPBB2IRBB[this] = NewBB; |
503 | State->CFG.PrevVPBB = this; |
504 | |
505 | for (VPRecipeBase &Recipe : Recipes) |
506 | Recipe.execute(State&: *State); |
507 | |
508 | LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB); |
509 | } |
510 | |
511 | void VPBasicBlock::dropAllReferences(VPValue *NewValue) { |
512 | for (VPRecipeBase &R : Recipes) { |
513 | for (auto *Def : R.definedValues()) |
514 | Def->replaceAllUsesWith(New: NewValue); |
515 | |
516 | for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) |
517 | R.setOperand(I, New: NewValue); |
518 | } |
519 | } |
520 | |
521 | VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) { |
522 | assert((SplitAt == end() || SplitAt->getParent() == this) && |
523 | "can only split at a position in the same block" ); |
524 | |
525 | SmallVector<VPBlockBase *, 2> Succs(successors()); |
526 | // First, disconnect the current block from its successors. |
527 | for (VPBlockBase *Succ : Succs) |
528 | VPBlockUtils::disconnectBlocks(From: this, To: Succ); |
529 | |
530 | // Create new empty block after the block to split. |
531 | auto *SplitBlock = new VPBasicBlock(getName() + ".split" ); |
532 | VPBlockUtils::insertBlockAfter(NewBlock: SplitBlock, BlockPtr: this); |
533 | |
534 | // Add successors for block to split to new block. |
535 | for (VPBlockBase *Succ : Succs) |
536 | VPBlockUtils::connectBlocks(From: SplitBlock, To: Succ); |
537 | |
538 | // Finally, move the recipes starting at SplitAt to new block. |
539 | for (VPRecipeBase &ToMove : |
540 | make_early_inc_range(Range: make_range(x: SplitAt, y: this->end()))) |
541 | ToMove.moveBefore(BB&: *SplitBlock, I: SplitBlock->end()); |
542 | |
543 | return SplitBlock; |
544 | } |
545 | |
546 | VPRegionBlock *VPBasicBlock::getEnclosingLoopRegion() { |
547 | VPRegionBlock *P = getParent(); |
548 | if (P && P->isReplicator()) { |
549 | P = P->getParent(); |
550 | assert(!cast<VPRegionBlock>(P)->isReplicator() && |
551 | "unexpected nested replicate regions" ); |
552 | } |
553 | return P; |
554 | } |
555 | |
556 | static bool hasConditionalTerminator(const VPBasicBlock *VPBB) { |
557 | if (VPBB->empty()) { |
558 | assert( |
559 | VPBB->getNumSuccessors() < 2 && |
560 | "block with multiple successors doesn't have a recipe as terminator" ); |
561 | return false; |
562 | } |
563 | |
564 | const VPRecipeBase *R = &VPBB->back(); |
565 | bool IsCondBranch = isa<VPBranchOnMaskRecipe>(Val: R) || |
566 | match(V: R, P: m_BranchOnCond(Op0: m_VPValue())) || |
567 | match(V: R, P: m_BranchOnCount(Op0: m_VPValue(), Op1: m_VPValue())); |
568 | (void)IsCondBranch; |
569 | |
570 | if (VPBB->getNumSuccessors() >= 2 || |
571 | (VPBB->isExiting() && !VPBB->getParent()->isReplicator())) { |
572 | assert(IsCondBranch && "block with multiple successors not terminated by " |
573 | "conditional branch recipe" ); |
574 | |
575 | return true; |
576 | } |
577 | |
578 | assert( |
579 | !IsCondBranch && |
580 | "block with 0 or 1 successors terminated by conditional branch recipe" ); |
581 | return false; |
582 | } |
583 | |
584 | VPRecipeBase *VPBasicBlock::getTerminator() { |
585 | if (hasConditionalTerminator(VPBB: this)) |
586 | return &back(); |
587 | return nullptr; |
588 | } |
589 | |
590 | const VPRecipeBase *VPBasicBlock::getTerminator() const { |
591 | if (hasConditionalTerminator(VPBB: this)) |
592 | return &back(); |
593 | return nullptr; |
594 | } |
595 | |
596 | bool VPBasicBlock::isExiting() const { |
597 | return getParent() && getParent()->getExitingBasicBlock() == this; |
598 | } |
599 | |
600 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
601 | void VPBlockBase::printSuccessors(raw_ostream &O, const Twine &Indent) const { |
602 | if (getSuccessors().empty()) { |
603 | O << Indent << "No successors\n" ; |
604 | } else { |
605 | O << Indent << "Successor(s): " ; |
606 | ListSeparator LS; |
607 | for (auto *Succ : getSuccessors()) |
608 | O << LS << Succ->getName(); |
609 | O << '\n'; |
610 | } |
611 | } |
612 | |
613 | void VPBasicBlock::print(raw_ostream &O, const Twine &Indent, |
614 | VPSlotTracker &SlotTracker) const { |
615 | O << Indent << getName() << ":\n" ; |
616 | |
617 | auto RecipeIndent = Indent + " " ; |
618 | for (const VPRecipeBase &Recipe : *this) { |
619 | Recipe.print(O, Indent: RecipeIndent, SlotTracker); |
620 | O << '\n'; |
621 | } |
622 | |
623 | printSuccessors(O, Indent); |
624 | } |
625 | #endif |
626 | |
627 | static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry); |
628 | |
629 | // Clone the CFG for all nodes in the single-entry-single-exit region reachable |
630 | // from \p Entry, this includes cloning the blocks and their recipes. Operands |
631 | // of cloned recipes will NOT be updated. Remapping of operands must be done |
632 | // separately. Returns a pair with the the new entry and exiting blocks of the |
633 | // cloned region. |
634 | static std::pair<VPBlockBase *, VPBlockBase *> cloneSESE(VPBlockBase *Entry) { |
635 | DenseMap<VPBlockBase *, VPBlockBase *> Old2NewVPBlocks; |
636 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT( |
637 | Entry); |
638 | for (VPBlockBase *BB : RPOT) { |
639 | VPBlockBase *NewBB = BB->clone(); |
640 | for (VPBlockBase *Pred : BB->getPredecessors()) |
641 | VPBlockUtils::connectBlocks(From: Old2NewVPBlocks[Pred], To: NewBB); |
642 | |
643 | Old2NewVPBlocks[BB] = NewBB; |
644 | } |
645 | |
646 | #if !defined(NDEBUG) |
647 | // Verify that the order of predecessors and successors matches in the cloned |
648 | // version. |
649 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> |
650 | NewRPOT(Old2NewVPBlocks[Entry]); |
651 | for (const auto &[OldBB, NewBB] : zip(t&: RPOT, u&: NewRPOT)) { |
652 | for (const auto &[OldPred, NewPred] : |
653 | zip(t&: OldBB->getPredecessors(), u&: NewBB->getPredecessors())) |
654 | assert(NewPred == Old2NewVPBlocks[OldPred] && "Different predecessors" ); |
655 | |
656 | for (const auto &[OldSucc, NewSucc] : |
657 | zip(t: OldBB->successors(), u: NewBB->successors())) |
658 | assert(NewSucc == Old2NewVPBlocks[OldSucc] && "Different successors" ); |
659 | } |
660 | #endif |
661 | |
662 | return std::make_pair(x&: Old2NewVPBlocks[Entry], |
663 | y&: Old2NewVPBlocks[*reverse(C&: RPOT).begin()]); |
664 | } |
665 | |
666 | VPRegionBlock *VPRegionBlock::clone() { |
667 | const auto &[NewEntry, NewExiting] = cloneSESE(Entry: getEntry()); |
668 | auto *NewRegion = |
669 | new VPRegionBlock(NewEntry, NewExiting, getName(), isReplicator()); |
670 | for (VPBlockBase *Block : vp_depth_first_shallow(G: NewEntry)) |
671 | Block->setParent(NewRegion); |
672 | return NewRegion; |
673 | } |
674 | |
675 | void VPRegionBlock::dropAllReferences(VPValue *NewValue) { |
676 | for (VPBlockBase *Block : vp_depth_first_shallow(G: Entry)) |
677 | // Drop all references in VPBasicBlocks and replace all uses with |
678 | // DummyValue. |
679 | Block->dropAllReferences(NewValue); |
680 | } |
681 | |
682 | void VPRegionBlock::execute(VPTransformState *State) { |
683 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> |
684 | RPOT(Entry); |
685 | |
686 | if (!isReplicator()) { |
687 | // Create and register the new vector loop. |
688 | Loop *PrevLoop = State->CurrentVectorLoop; |
689 | State->CurrentVectorLoop = State->LI->AllocateLoop(); |
690 | BasicBlock *VectorPH = State->CFG.VPBB2IRBB[getPreheaderVPBB()]; |
691 | Loop *ParentLoop = State->LI->getLoopFor(BB: VectorPH); |
692 | |
693 | // Insert the new loop into the loop nest and register the new basic blocks |
694 | // before calling any utilities such as SCEV that require valid LoopInfo. |
695 | if (ParentLoop) |
696 | ParentLoop->addChildLoop(NewChild: State->CurrentVectorLoop); |
697 | else |
698 | State->LI->addTopLevelLoop(New: State->CurrentVectorLoop); |
699 | |
700 | // Visit the VPBlocks connected to "this", starting from it. |
701 | for (VPBlockBase *Block : RPOT) { |
702 | LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
703 | Block->execute(State); |
704 | } |
705 | |
706 | State->CurrentVectorLoop = PrevLoop; |
707 | return; |
708 | } |
709 | |
710 | assert(!State->Instance && "Replicating a Region with non-null instance." ); |
711 | |
712 | // Enter replicating mode. |
713 | State->Instance = VPIteration(0, 0); |
714 | |
715 | for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { |
716 | State->Instance->Part = Part; |
717 | assert(!State->VF.isScalable() && "VF is assumed to be non scalable." ); |
718 | for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF; |
719 | ++Lane) { |
720 | State->Instance->Lane = VPLane(Lane, VPLane::Kind::First); |
721 | // Visit the VPBlocks connected to \p this, starting from it. |
722 | for (VPBlockBase *Block : RPOT) { |
723 | LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n'); |
724 | Block->execute(State); |
725 | } |
726 | } |
727 | } |
728 | |
729 | // Exit replicating mode. |
730 | State->Instance.reset(); |
731 | } |
732 | |
733 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
734 | void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, |
735 | VPSlotTracker &SlotTracker) const { |
736 | O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> " ) << getName() << ": {" ; |
737 | auto NewIndent = Indent + " " ; |
738 | for (auto *BlockBase : vp_depth_first_shallow(G: Entry)) { |
739 | O << '\n'; |
740 | BlockBase->print(O, Indent: NewIndent, SlotTracker); |
741 | } |
742 | O << Indent << "}\n" ; |
743 | |
744 | printSuccessors(O, Indent); |
745 | } |
746 | #endif |
747 | |
748 | VPlan::~VPlan() { |
749 | for (auto &KV : LiveOuts) |
750 | delete KV.second; |
751 | LiveOuts.clear(); |
752 | |
753 | if (Entry) { |
754 | VPValue DummyValue; |
755 | for (VPBlockBase *Block : vp_depth_first_shallow(G: Entry)) |
756 | Block->dropAllReferences(NewValue: &DummyValue); |
757 | |
758 | VPBlockBase::deleteCFG(Entry); |
759 | |
760 | Preheader->dropAllReferences(NewValue: &DummyValue); |
761 | delete Preheader; |
762 | } |
763 | for (VPValue *VPV : VPLiveInsToFree) |
764 | delete VPV; |
765 | if (BackedgeTakenCount) |
766 | delete BackedgeTakenCount; |
767 | } |
768 | |
769 | VPlanPtr VPlan::createInitialVPlan(const SCEV *TripCount, ScalarEvolution &SE) { |
770 | VPBasicBlock * = new VPBasicBlock("ph" ); |
771 | VPBasicBlock * = new VPBasicBlock("vector.ph" ); |
772 | auto Plan = std::make_unique<VPlan>(args&: Preheader, args&: VecPreheader); |
773 | Plan->TripCount = |
774 | vputils::getOrCreateVPValueForSCEVExpr(Plan&: *Plan, Expr: TripCount, SE); |
775 | // Create empty VPRegionBlock, to be filled during processing later. |
776 | auto *TopRegion = new VPRegionBlock("vector loop" , false /*isReplicator*/); |
777 | VPBlockUtils::insertBlockAfter(NewBlock: TopRegion, BlockPtr: VecPreheader); |
778 | VPBasicBlock *MiddleVPBB = new VPBasicBlock("middle.block" ); |
779 | VPBlockUtils::insertBlockAfter(NewBlock: MiddleVPBB, BlockPtr: TopRegion); |
780 | return Plan; |
781 | } |
782 | |
783 | void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV, |
784 | Value *CanonicalIVStartValue, |
785 | VPTransformState &State) { |
786 | // Check if the backedge taken count is needed, and if so build it. |
787 | if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { |
788 | IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); |
789 | auto *TCMO = Builder.CreateSub(LHS: TripCountV, |
790 | RHS: ConstantInt::get(Ty: TripCountV->getType(), V: 1), |
791 | Name: "trip.count.minus.1" ); |
792 | BackedgeTakenCount->setUnderlyingValue(TCMO); |
793 | } |
794 | |
795 | VectorTripCount.setUnderlyingValue(VectorTripCountV); |
796 | |
797 | IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); |
798 | // FIXME: Model VF * UF computation completely in VPlan. |
799 | VFxUF.setUnderlyingValue( |
800 | createStepForVF(B&: Builder, Ty: TripCountV->getType(), VF: State.VF, Step: State.UF)); |
801 | |
802 | // When vectorizing the epilogue loop, the canonical induction start value |
803 | // needs to be changed from zero to the value after the main vector loop. |
804 | // FIXME: Improve modeling for canonical IV start values in the epilogue loop. |
805 | if (CanonicalIVStartValue) { |
806 | VPValue *VPV = getOrAddLiveIn(V: CanonicalIVStartValue); |
807 | auto *IV = getCanonicalIV(); |
808 | assert(all_of(IV->users(), |
809 | [](const VPUser *U) { |
810 | return isa<VPScalarIVStepsRecipe>(U) || |
811 | isa<VPScalarCastRecipe>(U) || |
812 | isa<VPDerivedIVRecipe>(U) || |
813 | cast<VPInstruction>(U)->getOpcode() == |
814 | Instruction::Add; |
815 | }) && |
816 | "the canonical IV should only be used by its increment or " |
817 | "ScalarIVSteps when resetting the start value" ); |
818 | IV->setOperand(I: 0, New: VPV); |
819 | } |
820 | } |
821 | |
822 | /// Generate the code inside the preheader and body of the vectorized loop. |
823 | /// Assumes a single pre-header basic-block was created for this. Introduce |
824 | /// additional basic-blocks as needed, and fill them all. |
825 | void VPlan::execute(VPTransformState *State) { |
826 | // Initialize CFG state. |
827 | State->CFG.PrevVPBB = nullptr; |
828 | State->CFG.ExitBB = State->CFG.PrevBB->getSingleSuccessor(); |
829 | BasicBlock * = State->CFG.PrevBB; |
830 | State->Builder.SetInsertPoint(VectorPreHeader->getTerminator()); |
831 | |
832 | // Generate code in the loop pre-header and body. |
833 | for (VPBlockBase *Block : vp_depth_first_shallow(G: Entry)) |
834 | Block->execute(State); |
835 | |
836 | VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock(); |
837 | BasicBlock *VectorLatchBB = State->CFG.VPBB2IRBB[LatchVPBB]; |
838 | |
839 | // Fix the latch value of canonical, reduction and first-order recurrences |
840 | // phis in the vector loop. |
841 | VPBasicBlock * = getVectorLoopRegion()->getEntryBasicBlock(); |
842 | for (VPRecipeBase &R : Header->phis()) { |
843 | // Skip phi-like recipes that generate their backedege values themselves. |
844 | if (isa<VPWidenPHIRecipe>(Val: &R)) |
845 | continue; |
846 | |
847 | if (isa<VPWidenPointerInductionRecipe>(Val: &R) || |
848 | isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) { |
849 | PHINode *Phi = nullptr; |
850 | if (isa<VPWidenIntOrFpInductionRecipe>(Val: &R)) { |
851 | Phi = cast<PHINode>(Val: State->get(Def: R.getVPSingleValue(), Part: 0)); |
852 | } else { |
853 | auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(Val: &R); |
854 | assert(!WidenPhi->onlyScalarsGenerated(State->VF.isScalable()) && |
855 | "recipe generating only scalars should have been replaced" ); |
856 | auto *GEP = cast<GetElementPtrInst>(Val: State->get(Def: WidenPhi, Part: 0)); |
857 | Phi = cast<PHINode>(Val: GEP->getPointerOperand()); |
858 | } |
859 | |
860 | Phi->setIncomingBlock(i: 1, BB: VectorLatchBB); |
861 | |
862 | // Move the last step to the end of the latch block. This ensures |
863 | // consistent placement of all induction updates. |
864 | Instruction *Inc = cast<Instruction>(Val: Phi->getIncomingValue(i: 1)); |
865 | Inc->moveBefore(MovePos: VectorLatchBB->getTerminator()->getPrevNode()); |
866 | continue; |
867 | } |
868 | |
869 | auto *PhiR = cast<VPHeaderPHIRecipe>(Val: &R); |
870 | // For canonical IV, first-order recurrences and in-order reduction phis, |
871 | // only a single part is generated, which provides the last part from the |
872 | // previous iteration. For non-ordered reductions all UF parts are |
873 | // generated. |
874 | bool SinglePartNeeded = |
875 | isa<VPCanonicalIVPHIRecipe>(Val: PhiR) || |
876 | isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(Val: PhiR) || |
877 | (isa<VPReductionPHIRecipe>(Val: PhiR) && |
878 | cast<VPReductionPHIRecipe>(Val: PhiR)->isOrdered()); |
879 | bool NeedsScalar = |
880 | isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(Val: PhiR) || |
881 | (isa<VPReductionPHIRecipe>(Val: PhiR) && |
882 | cast<VPReductionPHIRecipe>(Val: PhiR)->isInLoop()); |
883 | unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; |
884 | |
885 | for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { |
886 | Value *Phi = State->get(Def: PhiR, Part, NeedsScalar); |
887 | Value *Val = |
888 | State->get(Def: PhiR->getBackedgeValue(), |
889 | Part: SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar); |
890 | cast<PHINode>(Val: Phi)->addIncoming(V: Val, BB: VectorLatchBB); |
891 | } |
892 | } |
893 | |
894 | // We do not attempt to preserve DT for outer loop vectorization currently. |
895 | if (!EnableVPlanNativePath) { |
896 | BasicBlock * = State->CFG.VPBB2IRBB[Header]; |
897 | State->DT->addNewBlock(BB: VectorHeaderBB, DomBB: VectorPreHeader); |
898 | updateDominatorTree(DT: State->DT, LoopLatchBB: VectorHeaderBB, LoopPreHeaderBB: VectorLatchBB, |
899 | LoopExitBB: State->CFG.ExitBB); |
900 | } |
901 | } |
902 | |
903 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
904 | void VPlan::printLiveIns(raw_ostream &O) const { |
905 | VPSlotTracker SlotTracker(this); |
906 | |
907 | if (VFxUF.getNumUsers() > 0) { |
908 | O << "\nLive-in " ; |
909 | VFxUF.printAsOperand(OS&: O, Tracker&: SlotTracker); |
910 | O << " = VF * UF" ; |
911 | } |
912 | |
913 | if (VectorTripCount.getNumUsers() > 0) { |
914 | O << "\nLive-in " ; |
915 | VectorTripCount.printAsOperand(OS&: O, Tracker&: SlotTracker); |
916 | O << " = vector-trip-count" ; |
917 | } |
918 | |
919 | if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { |
920 | O << "\nLive-in " ; |
921 | BackedgeTakenCount->printAsOperand(OS&: O, Tracker&: SlotTracker); |
922 | O << " = backedge-taken count" ; |
923 | } |
924 | |
925 | O << "\n" ; |
926 | if (TripCount->isLiveIn()) |
927 | O << "Live-in " ; |
928 | TripCount->printAsOperand(OS&: O, Tracker&: SlotTracker); |
929 | O << " = original trip-count" ; |
930 | O << "\n" ; |
931 | } |
932 | |
933 | LLVM_DUMP_METHOD |
934 | void VPlan::print(raw_ostream &O) const { |
935 | VPSlotTracker SlotTracker(this); |
936 | |
937 | O << "VPlan '" << getName() << "' {" ; |
938 | |
939 | printLiveIns(O); |
940 | |
941 | if (!getPreheader()->empty()) { |
942 | O << "\n" ; |
943 | getPreheader()->print(O, Indent: "" , SlotTracker); |
944 | } |
945 | |
946 | for (const VPBlockBase *Block : vp_depth_first_shallow(G: getEntry())) { |
947 | O << '\n'; |
948 | Block->print(O, Indent: "" , SlotTracker); |
949 | } |
950 | |
951 | if (!LiveOuts.empty()) |
952 | O << "\n" ; |
953 | for (const auto &KV : LiveOuts) { |
954 | KV.second->print(O, SlotTracker); |
955 | } |
956 | |
957 | O << "}\n" ; |
958 | } |
959 | |
960 | std::string VPlan::getName() const { |
961 | std::string Out; |
962 | raw_string_ostream RSO(Out); |
963 | RSO << Name << " for " ; |
964 | if (!VFs.empty()) { |
965 | RSO << "VF={" << VFs[0]; |
966 | for (ElementCount VF : drop_begin(RangeOrContainer: VFs)) |
967 | RSO << "," << VF; |
968 | RSO << "}," ; |
969 | } |
970 | |
971 | if (UFs.empty()) { |
972 | RSO << "UF>=1" ; |
973 | } else { |
974 | RSO << "UF={" << UFs[0]; |
975 | for (unsigned UF : drop_begin(RangeOrContainer: UFs)) |
976 | RSO << "," << UF; |
977 | RSO << "}" ; |
978 | } |
979 | |
980 | return Out; |
981 | } |
982 | |
983 | LLVM_DUMP_METHOD |
984 | void VPlan::printDOT(raw_ostream &O) const { |
985 | VPlanPrinter Printer(O, *this); |
986 | Printer.dump(); |
987 | } |
988 | |
989 | LLVM_DUMP_METHOD |
990 | void VPlan::dump() const { print(O&: dbgs()); } |
991 | #endif |
992 | |
993 | void VPlan::addLiveOut(PHINode *PN, VPValue *V) { |
994 | assert(LiveOuts.count(PN) == 0 && "an exit value for PN already exists" ); |
995 | LiveOuts.insert(KV: {PN, new VPLiveOut(PN, V)}); |
996 | } |
997 | |
998 | void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *, |
999 | BasicBlock *LoopLatchBB, |
1000 | BasicBlock *LoopExitBB) { |
1001 | // The vector body may be more than a single basic-block by this point. |
1002 | // Update the dominator tree information inside the vector body by propagating |
1003 | // it from header to latch, expecting only triangular control-flow, if any. |
1004 | BasicBlock *PostDomSucc = nullptr; |
1005 | for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) { |
1006 | // Get the list of successors of this block. |
1007 | std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB)); |
1008 | assert(Succs.size() <= 2 && |
1009 | "Basic block in vector loop has more than 2 successors." ); |
1010 | PostDomSucc = Succs[0]; |
1011 | if (Succs.size() == 1) { |
1012 | assert(PostDomSucc->getSinglePredecessor() && |
1013 | "PostDom successor has more than one predecessor." ); |
1014 | DT->addNewBlock(BB: PostDomSucc, DomBB: BB); |
1015 | continue; |
1016 | } |
1017 | BasicBlock *InterimSucc = Succs[1]; |
1018 | if (PostDomSucc->getSingleSuccessor() == InterimSucc) { |
1019 | PostDomSucc = Succs[1]; |
1020 | InterimSucc = Succs[0]; |
1021 | } |
1022 | assert(InterimSucc->getSingleSuccessor() == PostDomSucc && |
1023 | "One successor of a basic block does not lead to the other." ); |
1024 | assert(InterimSucc->getSinglePredecessor() && |
1025 | "Interim successor has more than one predecessor." ); |
1026 | assert(PostDomSucc->hasNPredecessors(2) && |
1027 | "PostDom successor has more than two predecessors." ); |
1028 | DT->addNewBlock(BB: InterimSucc, DomBB: BB); |
1029 | DT->addNewBlock(BB: PostDomSucc, DomBB: BB); |
1030 | } |
1031 | // Latch block is a new dominator for the loop exit. |
1032 | DT->changeImmediateDominator(BB: LoopExitBB, NewBB: LoopLatchBB); |
1033 | assert(DT->verify(DominatorTree::VerificationLevel::Fast)); |
1034 | } |
1035 | |
1036 | static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, |
1037 | DenseMap<VPValue *, VPValue *> &Old2NewVPValues) { |
1038 | // Update the operands of all cloned recipes starting at NewEntry. This |
1039 | // traverses all reachable blocks. This is done in two steps, to handle cycles |
1040 | // in PHI recipes. |
1041 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> |
1042 | OldDeepRPOT(Entry); |
1043 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> |
1044 | NewDeepRPOT(NewEntry); |
1045 | // First, collect all mappings from old to new VPValues defined by cloned |
1046 | // recipes. |
1047 | for (const auto &[OldBB, NewBB] : |
1048 | zip(t: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: OldDeepRPOT), |
1049 | u: VPBlockUtils::blocksOnly<VPBasicBlock>(Range: NewDeepRPOT))) { |
1050 | assert(OldBB->getRecipeList().size() == NewBB->getRecipeList().size() && |
1051 | "blocks must have the same number of recipes" ); |
1052 | for (const auto &[OldR, NewR] : zip(t&: *OldBB, u&: *NewBB)) { |
1053 | assert(OldR.getNumOperands() == NewR.getNumOperands() && |
1054 | "recipes must have the same number of operands" ); |
1055 | assert(OldR.getNumDefinedValues() == NewR.getNumDefinedValues() && |
1056 | "recipes must define the same number of operands" ); |
1057 | for (const auto &[OldV, NewV] : |
1058 | zip(t: OldR.definedValues(), u: NewR.definedValues())) |
1059 | Old2NewVPValues[OldV] = NewV; |
1060 | } |
1061 | } |
1062 | |
1063 | // Update all operands to use cloned VPValues. |
1064 | for (VPBasicBlock *NewBB : |
1065 | VPBlockUtils::blocksOnly<VPBasicBlock>(Range: NewDeepRPOT)) { |
1066 | for (VPRecipeBase &NewR : *NewBB) |
1067 | for (unsigned I = 0, E = NewR.getNumOperands(); I != E; ++I) { |
1068 | VPValue *NewOp = Old2NewVPValues.lookup(Val: NewR.getOperand(N: I)); |
1069 | NewR.setOperand(I, New: NewOp); |
1070 | } |
1071 | } |
1072 | } |
1073 | |
1074 | VPlan *VPlan::duplicate() { |
1075 | // Clone blocks. |
1076 | VPBasicBlock * = Preheader->clone(); |
1077 | const auto &[NewEntry, __] = cloneSESE(Entry); |
1078 | |
1079 | // Create VPlan, clone live-ins and remap operands in the cloned blocks. |
1080 | auto *NewPlan = new VPlan(NewPreheader, cast<VPBasicBlock>(Val: NewEntry)); |
1081 | DenseMap<VPValue *, VPValue *> Old2NewVPValues; |
1082 | for (VPValue *OldLiveIn : VPLiveInsToFree) { |
1083 | Old2NewVPValues[OldLiveIn] = |
1084 | NewPlan->getOrAddLiveIn(V: OldLiveIn->getLiveInIRValue()); |
1085 | } |
1086 | Old2NewVPValues[&VectorTripCount] = &NewPlan->VectorTripCount; |
1087 | Old2NewVPValues[&VFxUF] = &NewPlan->VFxUF; |
1088 | if (BackedgeTakenCount) { |
1089 | NewPlan->BackedgeTakenCount = new VPValue(); |
1090 | Old2NewVPValues[BackedgeTakenCount] = NewPlan->BackedgeTakenCount; |
1091 | } |
1092 | assert(TripCount && "trip count must be set" ); |
1093 | if (TripCount->isLiveIn()) |
1094 | Old2NewVPValues[TripCount] = |
1095 | NewPlan->getOrAddLiveIn(V: TripCount->getLiveInIRValue()); |
1096 | // else NewTripCount will be created and inserted into Old2NewVPValues when |
1097 | // TripCount is cloned. In any case NewPlan->TripCount is updated below. |
1098 | |
1099 | remapOperands(Entry: Preheader, NewEntry: NewPreheader, Old2NewVPValues); |
1100 | remapOperands(Entry, NewEntry, Old2NewVPValues); |
1101 | |
1102 | // Clone live-outs. |
1103 | for (const auto &[_, LO] : LiveOuts) |
1104 | NewPlan->addLiveOut(PN: LO->getPhi(), V: Old2NewVPValues[LO->getOperand(N: 0)]); |
1105 | |
1106 | // Initialize remaining fields of cloned VPlan. |
1107 | NewPlan->VFs = VFs; |
1108 | NewPlan->UFs = UFs; |
1109 | // TODO: Adjust names. |
1110 | NewPlan->Name = Name; |
1111 | assert(Old2NewVPValues.contains(TripCount) && |
1112 | "TripCount must have been added to Old2NewVPValues" ); |
1113 | NewPlan->TripCount = Old2NewVPValues[TripCount]; |
1114 | return NewPlan; |
1115 | } |
1116 | |
1117 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1118 | |
1119 | Twine VPlanPrinter::getUID(const VPBlockBase *Block) { |
1120 | return (isa<VPRegionBlock>(Val: Block) ? "cluster_N" : "N" ) + |
1121 | Twine(getOrCreateBID(Block)); |
1122 | } |
1123 | |
1124 | Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) { |
1125 | const std::string &Name = Block->getName(); |
1126 | if (!Name.empty()) |
1127 | return Name; |
1128 | return "VPB" + Twine(getOrCreateBID(Block)); |
1129 | } |
1130 | |
1131 | void VPlanPrinter::dump() { |
1132 | Depth = 1; |
1133 | bumpIndent(b: 0); |
1134 | OS << "digraph VPlan {\n" ; |
1135 | OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan" ; |
1136 | if (!Plan.getName().empty()) |
1137 | OS << "\\n" << DOT::EscapeString(Label: Plan.getName()); |
1138 | |
1139 | { |
1140 | // Print live-ins. |
1141 | std::string Str; |
1142 | raw_string_ostream SS(Str); |
1143 | Plan.printLiveIns(O&: SS); |
1144 | SmallVector<StringRef, 0> Lines; |
1145 | StringRef(Str).rtrim(Char: '\n').split(A&: Lines, Separator: "\n" ); |
1146 | for (auto Line : Lines) |
1147 | OS << DOT::EscapeString(Label: Line.str()) << "\\n" ; |
1148 | } |
1149 | |
1150 | OS << "\"]\n" ; |
1151 | OS << "node [shape=rect, fontname=Courier, fontsize=30]\n" ; |
1152 | OS << "edge [fontname=Courier, fontsize=30]\n" ; |
1153 | OS << "compound=true\n" ; |
1154 | |
1155 | dumpBlock(Block: Plan.getPreheader()); |
1156 | |
1157 | for (const VPBlockBase *Block : vp_depth_first_shallow(G: Plan.getEntry())) |
1158 | dumpBlock(Block); |
1159 | |
1160 | OS << "}\n" ; |
1161 | } |
1162 | |
1163 | void VPlanPrinter::dumpBlock(const VPBlockBase *Block) { |
1164 | if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Val: Block)) |
1165 | dumpBasicBlock(BasicBlock); |
1166 | else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
1167 | dumpRegion(Region); |
1168 | else |
1169 | llvm_unreachable("Unsupported kind of VPBlock." ); |
1170 | } |
1171 | |
1172 | void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To, |
1173 | bool Hidden, const Twine &Label) { |
1174 | // Due to "dot" we print an edge between two regions as an edge between the |
1175 | // exiting basic block and the entry basic of the respective regions. |
1176 | const VPBlockBase *Tail = From->getExitingBasicBlock(); |
1177 | const VPBlockBase *Head = To->getEntryBasicBlock(); |
1178 | OS << Indent << getUID(Block: Tail) << " -> " << getUID(Block: Head); |
1179 | OS << " [ label=\"" << Label << '\"'; |
1180 | if (Tail != From) |
1181 | OS << " ltail=" << getUID(Block: From); |
1182 | if (Head != To) |
1183 | OS << " lhead=" << getUID(Block: To); |
1184 | if (Hidden) |
1185 | OS << "; splines=none" ; |
1186 | OS << "]\n" ; |
1187 | } |
1188 | |
1189 | void VPlanPrinter::dumpEdges(const VPBlockBase *Block) { |
1190 | auto &Successors = Block->getSuccessors(); |
1191 | if (Successors.size() == 1) |
1192 | drawEdge(From: Block, To: Successors.front(), Hidden: false, Label: "" ); |
1193 | else if (Successors.size() == 2) { |
1194 | drawEdge(From: Block, To: Successors.front(), Hidden: false, Label: "T" ); |
1195 | drawEdge(From: Block, To: Successors.back(), Hidden: false, Label: "F" ); |
1196 | } else { |
1197 | unsigned SuccessorNumber = 0; |
1198 | for (auto *Successor : Successors) |
1199 | drawEdge(From: Block, To: Successor, Hidden: false, Label: Twine(SuccessorNumber++)); |
1200 | } |
1201 | } |
1202 | |
1203 | void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) { |
1204 | // Implement dot-formatted dump by performing plain-text dump into the |
1205 | // temporary storage followed by some post-processing. |
1206 | OS << Indent << getUID(Block: BasicBlock) << " [label =\n" ; |
1207 | bumpIndent(b: 1); |
1208 | std::string Str; |
1209 | raw_string_ostream SS(Str); |
1210 | // Use no indentation as we need to wrap the lines into quotes ourselves. |
1211 | BasicBlock->print(O&: SS, Indent: "" , SlotTracker); |
1212 | |
1213 | // We need to process each line of the output separately, so split |
1214 | // single-string plain-text dump. |
1215 | SmallVector<StringRef, 0> Lines; |
1216 | StringRef(Str).rtrim(Char: '\n').split(A&: Lines, Separator: "\n" ); |
1217 | |
1218 | auto EmitLine = [&](StringRef Line, StringRef Suffix) { |
1219 | OS << Indent << '"' << DOT::EscapeString(Label: Line.str()) << "\\l\"" << Suffix; |
1220 | }; |
1221 | |
1222 | // Don't need the "+" after the last line. |
1223 | for (auto Line : make_range(x: Lines.begin(), y: Lines.end() - 1)) |
1224 | EmitLine(Line, " +\n" ); |
1225 | EmitLine(Lines.back(), "\n" ); |
1226 | |
1227 | bumpIndent(b: -1); |
1228 | OS << Indent << "]\n" ; |
1229 | |
1230 | dumpEdges(Block: BasicBlock); |
1231 | } |
1232 | |
1233 | void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { |
1234 | OS << Indent << "subgraph " << getUID(Block: Region) << " {\n" ; |
1235 | bumpIndent(b: 1); |
1236 | OS << Indent << "fontname=Courier\n" |
1237 | << Indent << "label=\"" |
1238 | << DOT::EscapeString(Label: Region->isReplicator() ? "<xVFxUF> " : "<x1> " ) |
1239 | << DOT::EscapeString(Label: Region->getName()) << "\"\n" ; |
1240 | // Dump the blocks of the region. |
1241 | assert(Region->getEntry() && "Region contains no inner blocks." ); |
1242 | for (const VPBlockBase *Block : vp_depth_first_shallow(G: Region->getEntry())) |
1243 | dumpBlock(Block); |
1244 | bumpIndent(b: -1); |
1245 | OS << Indent << "}\n" ; |
1246 | dumpEdges(Block: Region); |
1247 | } |
1248 | |
1249 | void VPlanIngredient::print(raw_ostream &O) const { |
1250 | if (auto *Inst = dyn_cast<Instruction>(Val: V)) { |
1251 | if (!Inst->getType()->isVoidTy()) { |
1252 | Inst->printAsOperand(O, PrintType: false); |
1253 | O << " = " ; |
1254 | } |
1255 | O << Inst->getOpcodeName() << " " ; |
1256 | unsigned E = Inst->getNumOperands(); |
1257 | if (E > 0) { |
1258 | Inst->getOperand(i: 0)->printAsOperand(O, PrintType: false); |
1259 | for (unsigned I = 1; I < E; ++I) |
1260 | Inst->getOperand(i: I)->printAsOperand(O&: O << ", " , PrintType: false); |
1261 | } |
1262 | } else // !Inst |
1263 | V->printAsOperand(O, PrintType: false); |
1264 | } |
1265 | |
1266 | #endif |
1267 | |
1268 | template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT); |
1269 | |
1270 | void VPValue::replaceAllUsesWith(VPValue *New) { |
1271 | replaceUsesWithIf(New, ShouldReplace: [](VPUser &, unsigned) { return true; }); |
1272 | } |
1273 | |
1274 | void VPValue::replaceUsesWithIf( |
1275 | VPValue *New, |
1276 | llvm::function_ref<bool(VPUser &U, unsigned Idx)> ShouldReplace) { |
1277 | // Note that this early exit is required for correctness; the implementation |
1278 | // below relies on the number of users for this VPValue to decrease, which |
1279 | // isn't the case if this == New. |
1280 | if (this == New) |
1281 | return; |
1282 | |
1283 | for (unsigned J = 0; J < getNumUsers();) { |
1284 | VPUser *User = Users[J]; |
1285 | bool RemovedUser = false; |
1286 | for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I) { |
1287 | if (User->getOperand(N: I) != this || !ShouldReplace(*User, I)) |
1288 | continue; |
1289 | |
1290 | RemovedUser = true; |
1291 | User->setOperand(I, New); |
1292 | } |
1293 | // If a user got removed after updating the current user, the next user to |
1294 | // update will be moved to the current position, so we only need to |
1295 | // increment the index if the number of users did not change. |
1296 | if (!RemovedUser) |
1297 | J++; |
1298 | } |
1299 | } |
1300 | |
1301 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
1302 | void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const { |
1303 | OS << Tracker.getOrCreateName(V: this); |
1304 | } |
1305 | |
1306 | void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const { |
1307 | interleaveComma(c: operands(), os&: O, each_fn: [&O, &SlotTracker](VPValue *Op) { |
1308 | Op->printAsOperand(OS&: O, Tracker&: SlotTracker); |
1309 | }); |
1310 | } |
1311 | #endif |
1312 | |
1313 | void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region, |
1314 | Old2NewTy &Old2New, |
1315 | InterleavedAccessInfo &IAI) { |
1316 | ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> |
1317 | RPOT(Region->getEntry()); |
1318 | for (VPBlockBase *Base : RPOT) { |
1319 | visitBlock(Block: Base, Old2New, IAI); |
1320 | } |
1321 | } |
1322 | |
1323 | void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New, |
1324 | InterleavedAccessInfo &IAI) { |
1325 | if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Val: Block)) { |
1326 | for (VPRecipeBase &VPI : *VPBB) { |
1327 | if (isa<VPWidenPHIRecipe>(Val: &VPI)) |
1328 | continue; |
1329 | assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions" ); |
1330 | auto *VPInst = cast<VPInstruction>(Val: &VPI); |
1331 | |
1332 | auto *Inst = dyn_cast_or_null<Instruction>(Val: VPInst->getUnderlyingValue()); |
1333 | if (!Inst) |
1334 | continue; |
1335 | auto *IG = IAI.getInterleaveGroup(Instr: Inst); |
1336 | if (!IG) |
1337 | continue; |
1338 | |
1339 | auto NewIGIter = Old2New.find(Val: IG); |
1340 | if (NewIGIter == Old2New.end()) |
1341 | Old2New[IG] = new InterleaveGroup<VPInstruction>( |
1342 | IG->getFactor(), IG->isReverse(), IG->getAlign()); |
1343 | |
1344 | if (Inst == IG->getInsertPos()) |
1345 | Old2New[IG]->setInsertPos(VPInst); |
1346 | |
1347 | InterleaveGroupMap[VPInst] = Old2New[IG]; |
1348 | InterleaveGroupMap[VPInst]->insertMember( |
1349 | Instr: VPInst, Index: IG->getIndex(Instr: Inst), |
1350 | NewAlign: Align(IG->isReverse() ? (-1) * int(IG->getFactor()) |
1351 | : IG->getFactor())); |
1352 | } |
1353 | } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Val: Block)) |
1354 | visitRegion(Region, Old2New, IAI); |
1355 | else |
1356 | llvm_unreachable("Unsupported kind of VPBlock." ); |
1357 | } |
1358 | |
1359 | VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan, |
1360 | InterleavedAccessInfo &IAI) { |
1361 | Old2NewTy Old2New; |
1362 | visitRegion(Region: Plan.getVectorLoopRegion(), Old2New, IAI); |
1363 | } |
1364 | |
1365 | void VPSlotTracker::assignName(const VPValue *V) { |
1366 | assert(!VPValue2Name.contains(V) && "VPValue already has a name!" ); |
1367 | auto *UV = V->getUnderlyingValue(); |
1368 | if (!UV) { |
1369 | VPValue2Name[V] = (Twine("vp<%" ) + Twine(NextSlot) + ">" ).str(); |
1370 | NextSlot++; |
1371 | return; |
1372 | } |
1373 | |
1374 | // Use the name of the underlying Value, wrapped in "ir<>", and versioned by |
1375 | // appending ".Number" to the name if there are multiple uses. |
1376 | std::string Name; |
1377 | raw_string_ostream S(Name); |
1378 | UV->printAsOperand(O&: S, PrintType: false); |
1379 | assert(!Name.empty() && "Name cannot be empty." ); |
1380 | std::string BaseName = (Twine("ir<" ) + Name + Twine(">" )).str(); |
1381 | |
1382 | // First assign the base name for V. |
1383 | const auto &[A, _] = VPValue2Name.insert(KV: {V, BaseName}); |
1384 | // Integer or FP constants with different types will result in he same string |
1385 | // due to stripping types. |
1386 | if (V->isLiveIn() && isa<ConstantInt, ConstantFP>(Val: UV)) |
1387 | return; |
1388 | |
1389 | // If it is already used by C > 0 other VPValues, increase the version counter |
1390 | // C and use it for V. |
1391 | const auto &[C, UseInserted] = BaseName2Version.insert(KV: {BaseName, 0}); |
1392 | if (!UseInserted) { |
1393 | C->second++; |
1394 | A->second = (BaseName + Twine("." ) + Twine(C->second)).str(); |
1395 | } |
1396 | } |
1397 | |
1398 | void VPSlotTracker::assignNames(const VPlan &Plan) { |
1399 | if (Plan.VFxUF.getNumUsers() > 0) |
1400 | assignName(V: &Plan.VFxUF); |
1401 | assignName(V: &Plan.VectorTripCount); |
1402 | if (Plan.BackedgeTakenCount) |
1403 | assignName(V: Plan.BackedgeTakenCount); |
1404 | for (VPValue *LI : Plan.VPLiveInsToFree) |
1405 | assignName(V: LI); |
1406 | assignNames(VPBB: Plan.getPreheader()); |
1407 | |
1408 | ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> |
1409 | RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); |
1410 | for (const VPBasicBlock *VPBB : |
1411 | VPBlockUtils::blocksOnly<const VPBasicBlock>(Range: RPOT)) |
1412 | assignNames(VPBB); |
1413 | } |
1414 | |
1415 | void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { |
1416 | for (const VPRecipeBase &Recipe : *VPBB) |
1417 | for (VPValue *Def : Recipe.definedValues()) |
1418 | assignName(V: Def); |
1419 | } |
1420 | |
1421 | std::string VPSlotTracker::getOrCreateName(const VPValue *V) const { |
1422 | std::string Name = VPValue2Name.lookup(Val: V); |
1423 | if (!Name.empty()) |
1424 | return Name; |
1425 | |
1426 | // If no name was assigned, no VPlan was provided when creating the slot |
1427 | // tracker or it is not reachable from the provided VPlan. This can happen, |
1428 | // e.g. when trying to print a recipe that has not been inserted into a VPlan |
1429 | // in a debugger. |
1430 | // TODO: Update VPSlotTracker constructor to assign names to recipes & |
1431 | // VPValues not associated with a VPlan, instead of constructing names ad-hoc |
1432 | // here. |
1433 | const VPRecipeBase *DefR = V->getDefiningRecipe(); |
1434 | (void)DefR; |
1435 | assert((!DefR || !DefR->getParent() || !DefR->getParent()->getPlan()) && |
1436 | "VPValue defined by a recipe in a VPlan?" ); |
1437 | |
1438 | // Use the underlying value's name, if there is one. |
1439 | if (auto *UV = V->getUnderlyingValue()) { |
1440 | std::string Name; |
1441 | raw_string_ostream S(Name); |
1442 | UV->printAsOperand(O&: S, PrintType: false); |
1443 | return (Twine("ir<" ) + Name + ">" ).str(); |
1444 | } |
1445 | |
1446 | return "<badref>" ; |
1447 | } |
1448 | |
1449 | bool vputils::onlyFirstLaneUsed(const VPValue *Def) { |
1450 | return all_of(Range: Def->users(), |
1451 | P: [Def](const VPUser *U) { return U->onlyFirstLaneUsed(Op: Def); }); |
1452 | } |
1453 | |
1454 | bool vputils::onlyFirstPartUsed(const VPValue *Def) { |
1455 | return all_of(Range: Def->users(), |
1456 | P: [Def](const VPUser *U) { return U->onlyFirstPartUsed(Op: Def); }); |
1457 | } |
1458 | |
1459 | VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, |
1460 | ScalarEvolution &SE) { |
1461 | if (auto *Expanded = Plan.getSCEVExpansion(S: Expr)) |
1462 | return Expanded; |
1463 | VPValue *Expanded = nullptr; |
1464 | if (auto *E = dyn_cast<SCEVConstant>(Val: Expr)) |
1465 | Expanded = Plan.getOrAddLiveIn(V: E->getValue()); |
1466 | else if (auto *E = dyn_cast<SCEVUnknown>(Val: Expr)) |
1467 | Expanded = Plan.getOrAddLiveIn(V: E->getValue()); |
1468 | else { |
1469 | Expanded = new VPExpandSCEVRecipe(Expr, SE); |
1470 | Plan.getPreheader()->appendRecipe(Recipe: Expanded->getDefiningRecipe()); |
1471 | } |
1472 | Plan.addSCEVExpansion(S: Expr, V: Expanded); |
1473 | return Expanded; |
1474 | } |
1475 | |