1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
46
47#include "llvm/ADT/GenericUniformityInfo.h"
48
49#include "llvm/ADT/STLExtras.h"
50#include "llvm/ADT/SmallPtrSet.h"
51#include "llvm/ADT/SparseBitVector.h"
52#include "llvm/ADT/StringExtras.h"
53#include "llvm/Support/raw_ostream.h"
54
55#include <set>
56
57#define DEBUG_TYPE "uniformity"
58
59namespace llvm {
60
61/// Construct a specially modified post-order traversal of cycles.
62///
63/// The ModifiedPO is contructed using a virtually modified CFG as follows:
64///
65/// 1. The successors of pre-entry nodes (predecessors of an cycle
66/// entry that are outside the cycle) are replaced by the
67/// successors of the successors of the header.
68/// 2. Successors of the cycle header are replaced by the exit blocks
69/// of the cycle.
70///
71/// Effectively, we produce a depth-first numbering with the following
72/// properties:
73///
74/// 1. Nodes after a cycle are numbered earlier than the cycle header.
75/// 2. The header is numbered earlier than the nodes in the cycle.
76/// 3. The numbering of the nodes within the cycle forms an interval
77/// starting with the header.
78///
79/// Effectively, the virtual modification arranges the nodes in a
80/// cycle as a DAG with the header as the sole leaf, and successors of
81/// the header as the roots. A reverse traversal of this numbering has
82/// the following invariant on the unmodified original CFG:
83///
84/// Each node is visited after all its predecessors, except if that
85/// predecessor is the cycle header.
86///
87template <typename ContextT> class ModifiedPostOrder {
88public:
89 using BlockT = typename ContextT::BlockT;
90 using FunctionT = typename ContextT::FunctionT;
91 using DominatorTreeT = typename ContextT::DominatorTreeT;
92
93 using CycleInfoT = GenericCycleInfo<ContextT>;
94 using CycleT = typename CycleInfoT::CycleT;
95 using const_iterator = typename std::vector<BlockT *>::const_iterator;
96
97 ModifiedPostOrder(const ContextT &C) : Context(C) {}
98
99 bool empty() const { return m_order.empty(); }
100 size_t size() const { return m_order.size(); }
101
102 void clear() { m_order.clear(); }
103 void compute(const CycleInfoT &CI);
104
105 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
106 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
107
108 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
109 POIndex[&BB] = m_order.size();
110 m_order.push_back(&BB);
111 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
112 << "): " << Context.print(&BB) << "\n");
113 if (isReducibleCycleHeader)
114 ReducibleCycleHeaders.insert(&BB);
115 }
116
117 unsigned getIndex(const BlockT *BB) const {
118 assert(POIndex.count(BB));
119 return POIndex.lookup(BB);
120 }
121
122 bool isReducibleCycleHeader(const BlockT *BB) const {
123 return ReducibleCycleHeaders.contains(BB);
124 }
125
126private:
127 SmallVector<const BlockT *> m_order;
128 DenseMap<const BlockT *, unsigned> POIndex;
129 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
130 const ContextT &Context;
131
132 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
133 SmallPtrSetImpl<const BlockT *> &Finalized);
134
135 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
136 const CycleInfoT &CI, const CycleT *Cycle,
137 SmallPtrSetImpl<const BlockT *> &Finalized);
138};
139
140template <typename> class DivergencePropagator;
141
142/// \class GenericSyncDependenceAnalysis
143///
144/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
145///
146/// An analysis per divergent branch that returns the set of basic
147/// blocks whose phi nodes become divergent due to divergent control.
148/// These are the blocks that are reachable by two disjoint paths from
149/// the branch, or cycle exits reachable along a path that is disjoint
150/// from a path to the cycle latch.
151
152// --- Above line is not a doxygen comment; intentionally left blank ---
153//
154// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
155//
156// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
157// control-induced divergence in phi nodes.
158//
159// -- Reference --
160// The algorithm is an extension of Section 5 of
161//
162// An abstract interpretation for SPMD divergence
163// on reducible control flow graphs.
164// Julian Rosemann, Simon Moll and Sebastian Hack
165// POPL '21
166//
167//
168// -- Sync dependence --
169// Sync dependence characterizes the control flow aspect of the
170// propagation of branch divergence. For example,
171//
172// %cond = icmp slt i32 %tid, 10
173// br i1 %cond, label %then, label %else
174// then:
175// br label %merge
176// else:
177// br label %merge
178// merge:
179// %a = phi i32 [ 0, %then ], [ 1, %else ]
180//
181// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
182// because %tid is not on its use-def chains, %a is sync dependent on %tid
183// because the branch "br i1 %cond" depends on %tid and affects which value %a
184// is assigned to.
185//
186//
187// -- Reduction to SSA construction --
188// There are two disjoint paths from A to X, if a certain variant of SSA
189// construction places a phi node in X under the following set-up scheme.
190//
191// This variant of SSA construction ignores incoming undef values.
192// That is paths from the entry without a definition do not result in
193// phi nodes.
194//
195// entry
196// / \
197// A \
198// / \ Y
199// B C /
200// \ / \ /
201// D E
202// \ /
203// F
204//
205// Assume that A contains a divergent branch. We are interested
206// in the set of all blocks where each block is reachable from A
207// via two disjoint paths. This would be the set {D, F} in this
208// case.
209// To generally reduce this query to SSA construction we introduce
210// a virtual variable x and assign to x different values in each
211// successor block of A.
212//
213// entry
214// / \
215// A \
216// / \ Y
217// x = 0 x = 1 /
218// \ / \ /
219// D E
220// \ /
221// F
222//
223// Our flavor of SSA construction for x will construct the following
224//
225// entry
226// / \
227// A \
228// / \ Y
229// x0 = 0 x1 = 1 /
230// \ / \ /
231// x2 = phi E
232// \ /
233// x3 = phi
234//
235// The blocks D and F contain phi nodes and are thus each reachable
236// by two disjoins paths from A.
237//
238// -- Remarks --
239// * In case of cycle exits we need to check for temporal divergence.
240// To this end, we check whether the definition of x differs between the
241// cycle exit and the cycle header (_after_ SSA construction).
242//
243// * In the presence of irreducible control flow, the fixed point is
244// reached only after multiple iterations. This is because labels
245// reaching the header of a cycle must be repropagated through the
246// cycle. This is true even in a reducible cycle, since the labels
247// may have been produced by a nested irreducible cycle.
248//
249// * Note that SyncDependenceAnalysis is not concerned with the points
250// of convergence in an irreducible cycle. It's only purpose is to
251// identify join blocks. The "diverged entry" criterion is
252// separately applied on join blocks to determine if an entire
253// irreducible cycle is assumed to be divergent.
254//
255// * Relevant related work:
256// A simple algorithm for global data flow analysis problems.
257// Matthew S. Hecht and Jeffrey D. Ullman.
258// SIAM Journal on Computing, 4(4):519–532, December 1975.
259//
260template <typename ContextT> class GenericSyncDependenceAnalysis {
261public:
262 using BlockT = typename ContextT::BlockT;
263 using DominatorTreeT = typename ContextT::DominatorTreeT;
264 using FunctionT = typename ContextT::FunctionT;
265 using ValueRefT = typename ContextT::ValueRefT;
266 using InstructionT = typename ContextT::InstructionT;
267
268 using CycleInfoT = GenericCycleInfo<ContextT>;
269 using CycleT = typename CycleInfoT::CycleT;
270
271 using ConstBlockSet = SmallPtrSet<const BlockT *, 4>;
272 using ModifiedPO = ModifiedPostOrder<ContextT>;
273
274 // * if BlockLabels[B] == C then C is the dominating definition at
275 // block B
276 // * if BlockLabels[B] == nullptr then we haven't seen B yet
277 // * if BlockLabels[B] == B then:
278 // - B is a join point of disjoint paths from X, or,
279 // - B is an immediate successor of X (initial value), or,
280 // - B is X
281 using BlockLabelMap = DenseMap<const BlockT *, const BlockT *>;
282
283 /// Information discovered by the sync dependence analysis for each
284 /// divergent branch.
285 struct DivergenceDescriptor {
286 // Join points of diverged paths.
287 ConstBlockSet JoinDivBlocks;
288 // Divergent cycle exits
289 ConstBlockSet CycleDivBlocks;
290 // Labels assigned to blocks on diverged paths.
291 BlockLabelMap BlockLabels;
292 };
293
294 using DivergencePropagatorT = DivergencePropagator<ContextT>;
295
296 GenericSyncDependenceAnalysis(const ContextT &Context,
297 const DominatorTreeT &DT, const CycleInfoT &CI);
298
299 /// \brief Computes divergent join points and cycle exits caused by branch
300 /// divergence in \p Term.
301 ///
302 /// This returns a pair of sets:
303 /// * The set of blocks which are reachable by disjoint paths from
304 /// \p Term.
305 /// * The set also contains cycle exits if there two disjoint paths:
306 /// one from \p Term to the cycle exit and another from \p Term to
307 /// the cycle header.
308 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
309
310private:
311 static DivergenceDescriptor EmptyDivergenceDesc;
312
313 ModifiedPO CyclePO;
314
315 const DominatorTreeT &DT;
316 const CycleInfoT &CI;
317
318 DenseMap<const BlockT *, std::unique_ptr<DivergenceDescriptor>>
319 CachedControlDivDescs;
320};
321
322/// \brief Analysis that identifies uniform values in a data-parallel
323/// execution.
324///
325/// This analysis propagates divergence in a data-parallel context
326/// from sources of divergence to all users. It can be instantiated
327/// for an IR that provides a suitable SSAContext.
328template <typename ContextT> class GenericUniformityAnalysisImpl {
329public:
330 using BlockT = typename ContextT::BlockT;
331 using FunctionT = typename ContextT::FunctionT;
332 using ValueRefT = typename ContextT::ValueRefT;
333 using ConstValueRefT = typename ContextT::ConstValueRefT;
334 using UseT = typename ContextT::UseT;
335 using InstructionT = typename ContextT::InstructionT;
336 using DominatorTreeT = typename ContextT::DominatorTreeT;
337
338 using CycleInfoT = GenericCycleInfo<ContextT>;
339 using CycleT = typename CycleInfoT::CycleT;
340
341 using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
342 using DivergenceDescriptorT =
343 typename SyncDependenceAnalysisT::DivergenceDescriptor;
344 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
345
346 GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI,
347 const TargetTransformInfo *TTI)
348 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
349 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
350
351 void initialize();
352
353 const FunctionT &getFunction() const { return F; }
354
355 /// \brief Mark \p UniVal as a value that is always uniform.
356 void addUniformOverride(const InstructionT &Instr);
357
358 /// \brief Examine \p I for divergent outputs and add to the worklist.
359 void markDivergent(const InstructionT &I);
360
361 /// \brief Mark \p DivVal as a divergent value.
362 /// \returns Whether the tracked divergence state of \p DivVal changed.
363 bool markDivergent(ConstValueRefT DivVal);
364
365 /// \brief Mark outputs of \p Instr as divergent.
366 /// \returns Whether the tracked divergence state of any output has changed.
367 bool markDefsDivergent(const InstructionT &Instr);
368
369 /// \brief Propagate divergence to all instructions in the region.
370 /// Divergence is seeded by calls to \p markDivergent.
371 void compute();
372
373 /// \brief Whether any value was marked or analyzed to be divergent.
374 bool hasDivergence() const { return !DivergentValues.empty(); }
375
376 /// \brief Whether \p Val will always return a uniform value regardless of its
377 /// operands
378 bool isAlwaysUniform(const InstructionT &Instr) const;
379
380 bool hasDivergentDefs(const InstructionT &I) const;
381
382 bool isDivergent(const InstructionT &I) const {
383 if (I.isTerminator()) {
384 return DivergentTermBlocks.contains(I.getParent());
385 }
386 return hasDivergentDefs(I);
387 };
388
389 /// \brief Whether \p Val is divergent at its definition.
390 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
391
392 bool isDivergentUse(const UseT &U) const;
393
394 bool hasDivergentTerminator(const BlockT &B) const {
395 return DivergentTermBlocks.contains(&B);
396 }
397
398 void print(raw_ostream &out) const;
399
400protected:
401 /// \brief Value/block pair representing a single phi input.
402 struct PhiInput {
403 ConstValueRefT value;
404 BlockT *predBlock;
405
406 PhiInput(ConstValueRefT value, BlockT *predBlock)
407 : value(value), predBlock(predBlock) {}
408 };
409
410 const ContextT &Context;
411 const FunctionT &F;
412 const CycleInfoT &CI;
413 const TargetTransformInfo *TTI = nullptr;
414
415 // Detected/marked divergent values.
416 std::set<ConstValueRefT> DivergentValues;
417 SmallPtrSet<const BlockT *, 32> DivergentTermBlocks;
418
419 // Internal worklist for divergence propagation.
420 std::vector<const InstructionT *> Worklist;
421
422 /// \brief Mark \p Term as divergent and push all Instructions that become
423 /// divergent as a result on the worklist.
424 void analyzeControlDivergence(const InstructionT &Term);
425
426private:
427 const DominatorTreeT &DT;
428
429 // Recognized cycles with divergent exits.
430 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
431
432 // Cycles assumed to be divergent.
433 //
434 // We don't use a set here because every insertion needs an explicit
435 // traversal of all existing members.
436 SmallVector<const CycleT *> AssumedDivergent;
437
438 // The SDA links divergent branches to divergent control-flow joins.
439 SyncDependenceAnalysisT SDA;
440
441 // Set of known-uniform values.
442 SmallPtrSet<const InstructionT *, 32> UniformOverrides;
443
444 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
445 /// the worklist.
446 void taintAndPushAllDefs(const BlockT &JoinBlock);
447
448 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
449 /// the worklist.
450 void taintAndPushPhiNodes(const BlockT &JoinBlock);
451
452 /// \brief Identify all Instructions that become divergent because \p DivExit
453 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
454 /// divergent and push them on the worklist.
455 void propagateCycleExitDivergence(const BlockT &DivExit,
456 const CycleT &DivCycle);
457
458 /// Mark as divergent all external uses of values defined in \p DefCycle.
459 void analyzeCycleExitDivergence(const CycleT &DefCycle);
460
461 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
462 void propagateTemporalDivergence(const InstructionT &I,
463 const CycleT &DefCycle);
464
465 /// \brief Push all users of \p Val (in the region) to the worklist.
466 void pushUsers(const InstructionT &I);
467 void pushUsers(ConstValueRefT V);
468
469 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
470
471 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
472 bool isTemporalDivergent(const BlockT &ObservingBlock,
473 const InstructionT &Def) const;
474};
475
476template <typename ImplT>
477void GenericUniformityAnalysisImplDeleter<ImplT>::operator()(ImplT *Impl) {
478 delete Impl;
479}
480
481/// Compute divergence starting with a divergent branch.
482template <typename ContextT> class DivergencePropagator {
483public:
484 using BlockT = typename ContextT::BlockT;
485 using DominatorTreeT = typename ContextT::DominatorTreeT;
486 using FunctionT = typename ContextT::FunctionT;
487 using ValueRefT = typename ContextT::ValueRefT;
488
489 using CycleInfoT = GenericCycleInfo<ContextT>;
490 using CycleT = typename CycleInfoT::CycleT;
491
492 using ModifiedPO = ModifiedPostOrder<ContextT>;
493 using SyncDependenceAnalysisT = GenericSyncDependenceAnalysis<ContextT>;
494 using DivergenceDescriptorT =
495 typename SyncDependenceAnalysisT::DivergenceDescriptor;
496 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
497
498 const ModifiedPO &CyclePOT;
499 const DominatorTreeT &DT;
500 const CycleInfoT &CI;
501 const BlockT &DivTermBlock;
502 const ContextT &Context;
503
504 // Track blocks that receive a new label. Every time we relabel a
505 // cycle header, we another pass over the modified post-order in
506 // order to propagate the header label. The bit vector also allows
507 // us to skip labels that have not changed.
508 SparseBitVector<> FreshLabels;
509
510 // divergent join and cycle exit descriptor.
511 std::unique_ptr<DivergenceDescriptorT> DivDesc;
512 BlockLabelMapT &BlockLabels;
513
514 DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT,
515 const CycleInfoT &CI, const BlockT &DivTermBlock)
516 : CyclePOT(CyclePOT), DT(DT), CI(CI), DivTermBlock(DivTermBlock),
517 Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
518 BlockLabels(DivDesc->BlockLabels) {}
519
520 void printDefs(raw_ostream &Out) {
521 Out << "Propagator::BlockLabels {\n";
522 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
523 const auto *Block = CyclePOT[BlockIdx];
524 const auto *Label = BlockLabels[Block];
525 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
526 if (!Label) {
527 Out << "<null>\n";
528 } else {
529 Out << Context.print(Label) << "\n";
530 }
531 }
532 Out << "}\n";
533 }
534
535 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
536 // causes a divergent join.
537 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
538 const auto *OldLabel = BlockLabels[&SuccBlock];
539
540 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
541 << "\tpushed label: " << Context.print(&PushedLabel)
542 << "\n"
543 << "\told label: " << Context.print(OldLabel) << "\n");
544
545 // Early exit if there is no change in the label.
546 if (OldLabel == &PushedLabel)
547 return false;
548
549 if (OldLabel != &SuccBlock) {
550 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
551 // Assigning a new label, mark this in FreshLabels.
552 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
553 FreshLabels.set(SuccIdx);
554 }
555
556 // This is not a join if the succ was previously unlabeled.
557 if (!OldLabel) {
558 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
559 << "\n");
560 BlockLabels[&SuccBlock] = &PushedLabel;
561 return false;
562 }
563
564 // This is a new join. Label the join block as itself, and not as
565 // the pushed label.
566 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
567 BlockLabels[&SuccBlock] = &SuccBlock;
568
569 return true;
570 }
571
572 // visiting a virtual cycle exit edge from the cycle header --> temporal
573 // divergence on join
574 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
575 if (!computeJoin(SuccBlock: ExitBlock, PushedLabel: Label))
576 return false;
577
578 // Identified a divergent cycle exit
579 DivDesc->CycleDivBlocks.insert(&ExitBlock);
580 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
581 << "\n");
582 return true;
583 }
584
585 // process \p SuccBlock with reaching definition \p Label
586 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
587 if (!computeJoin(SuccBlock, PushedLabel: Label))
588 return false;
589
590 // Divergent, disjoint paths join.
591 DivDesc->JoinDivBlocks.insert(&SuccBlock);
592 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
593 << "\n");
594 return true;
595 }
596
597 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
598 assert(DivDesc);
599
600 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
601 << Context.print(&DivTermBlock) << "\n");
602
603 // Early stopping criterion
604 int FloorIdx = CyclePOT.size() - 1;
605 const BlockT *FloorLabel = nullptr;
606 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
607
608 // Bootstrap with branch targets
609 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
610 for (const auto *SuccBlock : successors(&DivTermBlock)) {
611 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
612 // If DivTerm exits the cycle immediately, computeJoin() might
613 // not reach SuccBlock with a different label. We need to
614 // check for this exit now.
615 DivDesc->CycleDivBlocks.insert(SuccBlock);
616 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
617 << Context.print(SuccBlock) << "\n");
618 }
619 auto SuccIdx = CyclePOT.getIndex(SuccBlock);
620 visitEdge(SuccBlock: *SuccBlock, Label: *SuccBlock);
621 FloorIdx = std::min<int>(FloorIdx, SuccIdx);
622 }
623
624 while (true) {
625 auto BlockIdx = FreshLabels.find_last();
626 if (BlockIdx == -1 || BlockIdx < FloorIdx)
627 break;
628
629 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
630
631 FreshLabels.reset(Idx: BlockIdx);
632 if (BlockIdx == DivTermIdx) {
633 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
634 continue;
635 }
636
637 const auto *Block = CyclePOT[BlockIdx];
638 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
639 << BlockIdx << "\n");
640
641 const auto *Label = BlockLabels[Block];
642 assert(Label);
643
644 bool CausedJoin = false;
645 int LoweredFloorIdx = FloorIdx;
646
647 // If the current block is the header of a reducible cycle that
648 // contains the divergent branch, then the label should be
649 // propagated to the cycle exits. Such a header is the "last
650 // possible join" of any disjoint paths within this cycle. This
651 // prevents detection of spurious joins at the entries of any
652 // irreducible child cycles.
653 //
654 // This conclusion about the header is true for any choice of DFS:
655 //
656 // If some DFS has a reducible cycle C with header H, then for
657 // any other DFS, H is the header of a cycle C' that is a
658 // superset of C. For a divergent branch inside the subgraph
659 // C, any join node inside C is either H, or some node
660 // encountered without passing through H.
661 //
662 auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
663 if (!CyclePOT.isReducibleCycleHeader(Block))
664 return nullptr;
665 const auto *BlockCycle = CI.getCycle(Block);
666 if (BlockCycle->contains(&DivTermBlock))
667 return BlockCycle;
668 return nullptr;
669 };
670
671 if (const auto *BlockCycle = getReducibleParent(Block)) {
672 SmallVector<BlockT *, 4> BlockCycleExits;
673 BlockCycle->getExitBlocks(BlockCycleExits);
674 for (auto *BlockCycleExit : BlockCycleExits) {
675 CausedJoin |= visitCycleExitEdge(ExitBlock: *BlockCycleExit, Label: *Label);
676 LoweredFloorIdx =
677 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit));
678 }
679 } else {
680 for (const auto *SuccBlock : successors(Block)) {
681 CausedJoin |= visitEdge(SuccBlock: *SuccBlock, Label: *Label);
682 LoweredFloorIdx =
683 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock));
684 }
685 }
686
687 // Floor update
688 if (CausedJoin) {
689 // 1. Different labels pushed to successors
690 FloorIdx = LoweredFloorIdx;
691 } else if (FloorLabel != Label) {
692 // 2. No join caused BUT we pushed a label that is different than the
693 // last pushed label
694 FloorIdx = LoweredFloorIdx;
695 FloorLabel = Label;
696 }
697 }
698
699 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
700
701 // Check every cycle containing DivTermBlock for exit divergence.
702 // A cycle has exit divergence if the label of an exit block does
703 // not match the label of its header.
704 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
705 Cycle = Cycle->getParentCycle()) {
706 if (Cycle->isReducible()) {
707 // The exit divergence of a reducible cycle is recorded while
708 // propagating labels.
709 continue;
710 }
711 SmallVector<BlockT *> Exits;
712 Cycle->getExitBlocks(Exits);
713 auto *Header = Cycle->getHeader();
714 auto *HeaderLabel = BlockLabels[Header];
715 for (const auto *Exit : Exits) {
716 if (BlockLabels[Exit] != HeaderLabel) {
717 // Identified a divergent cycle exit
718 DivDesc->CycleDivBlocks.insert(Exit);
719 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
720 << "\n");
721 }
722 }
723 }
724
725 return std::move(DivDesc);
726 }
727};
728
729template <typename ContextT>
730typename llvm::GenericSyncDependenceAnalysis<ContextT>::DivergenceDescriptor
731 llvm::GenericSyncDependenceAnalysis<ContextT>::EmptyDivergenceDesc;
732
733template <typename ContextT>
734llvm::GenericSyncDependenceAnalysis<ContextT>::GenericSyncDependenceAnalysis(
735 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
736 : CyclePO(Context), DT(DT), CI(CI) {
737 CyclePO.compute(CI);
738}
739
740template <typename ContextT>
741auto llvm::GenericSyncDependenceAnalysis<ContextT>::getJoinBlocks(
742 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
743 // trivial case
744 if (succ_size(DivTermBlock) <= 1) {
745 return EmptyDivergenceDesc;
746 }
747
748 // already available in cache?
749 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
750 if (ItCached != CachedControlDivDescs.end())
751 return *ItCached->second;
752
753 // compute all join points
754 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
755 auto DivDesc = Propagator.computeJoinPoints();
756
757 auto printBlockSet = [&](ConstBlockSet &Blocks) {
758 return Printable([&](raw_ostream &Out) {
759 Out << "[";
760 ListSeparator LS;
761 for (const auto *BB : Blocks) {
762 Out << LS << CI.getSSAContext().print(BB);
763 }
764 Out << "]\n";
765 });
766 };
767
768 LLVM_DEBUG(
769 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
770 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
771 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
772 << "\n");
773 (void)printBlockSet;
774
775 auto ItInserted =
776 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
777 assert(ItInserted.second);
778 return *ItInserted.first->second;
779}
780
781template <typename ContextT>
782void GenericUniformityAnalysisImpl<ContextT>::markDivergent(
783 const InstructionT &I) {
784 if (isAlwaysUniform(Instr: I))
785 return;
786 bool Marked = false;
787 if (I.isTerminator()) {
788 Marked = DivergentTermBlocks.insert(I.getParent()).second;
789 if (Marked) {
790 LLVM_DEBUG(dbgs() << "marked divergent term block: "
791 << Context.print(I.getParent()) << "\n");
792 }
793 } else {
794 Marked = markDefsDivergent(Instr: I);
795 }
796
797 if (Marked)
798 Worklist.push_back(&I);
799}
800
801template <typename ContextT>
802bool GenericUniformityAnalysisImpl<ContextT>::markDivergent(
803 ConstValueRefT Val) {
804 if (DivergentValues.insert(Val).second) {
805 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
806 return true;
807 }
808 return false;
809}
810
811template <typename ContextT>
812void GenericUniformityAnalysisImpl<ContextT>::addUniformOverride(
813 const InstructionT &Instr) {
814 UniformOverrides.insert(&Instr);
815}
816
817// Mark as divergent all external uses of values defined in \p DefCycle.
818//
819// A value V defined by a block B inside \p DefCycle may be used outside the
820// cycle only if the use is a PHI in some exit block, or B dominates some exit
821// block. Thus, we check uses as follows:
822//
823// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
824// - For every block B inside \p DefCycle that dominates at least one exit
825// block, check all uses outside \p DefCycle.
826//
827// FIXME: This function does not distinguish between divergent and uniform
828// exits. For each divergent exit, only the values that are live at that exit
829// need to be propagated as divergent at their use outside the cycle.
830template <typename ContextT>
831void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
832 const CycleT &DefCycle) {
833 SmallVector<BlockT *> Exits;
834 DefCycle.getExitBlocks(Exits);
835 for (auto *Exit : Exits) {
836 for (auto &Phi : Exit->phis()) {
837 if (usesValueFromCycle(I: Phi, DefCycle)) {
838 markDivergent(Phi);
839 }
840 }
841 }
842
843 for (auto *BB : DefCycle.blocks()) {
844 if (!llvm::any_of(Exits,
845 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
846 continue;
847 for (auto &II : *BB) {
848 propagateTemporalDivergence(I: II, DefCycle);
849 }
850 }
851}
852
853template <typename ContextT>
854void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
855 const BlockT &DivExit, const CycleT &InnerDivCycle) {
856 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
857 << "\n");
858 auto *DivCycle = &InnerDivCycle;
859 auto *OuterDivCycle = DivCycle;
860 auto *ExitLevelCycle = CI.getCycle(&DivExit);
861 const unsigned CycleExitDepth =
862 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
863
864 // Find outer-most cycle that does not contain \p DivExit
865 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
866 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
867 << Context.print(DivCycle->getHeader()) << "\n");
868 OuterDivCycle = DivCycle;
869 DivCycle = DivCycle->getParentCycle();
870 }
871 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
872 << Context.print(OuterDivCycle->getHeader()) << "\n");
873
874 if (!DivergentExitCycles.insert(OuterDivCycle).second)
875 return;
876
877 // Exit divergence does not matter if the cycle itself is assumed to
878 // be divergent.
879 for (const auto *C : AssumedDivergent) {
880 if (C->contains(OuterDivCycle))
881 return;
882 }
883
884 analyzeCycleExitDivergence(DefCycle: *OuterDivCycle);
885}
886
887template <typename ContextT>
888void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
889 const BlockT &BB) {
890 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
891 for (const auto &I : instrs(BB)) {
892 // Terminators do not produce values; they are divergent only if
893 // the condition is divergent. That is handled when the divergent
894 // condition is placed in the worklist.
895 if (I.isTerminator())
896 break;
897
898 markDivergent(I);
899 }
900}
901
902/// Mark divergent phi nodes in a join block
903template <typename ContextT>
904void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
905 const BlockT &JoinBlock) {
906 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
907 << "\n");
908 for (const auto &Phi : JoinBlock.phis()) {
909 // FIXME: The non-undef value is not constant per se; it just happens to be
910 // uniform and may not dominate this PHI. So assuming that the same value
911 // reaches along all incoming edges may itself be undefined behaviour. This
912 // particular interpretation of the undef value was added to
913 // DivergenceAnalysis in the following review:
914 //
915 // https://reviews.llvm.org/D19013
916 if (ContextT::isConstantOrUndefValuePhi(Phi))
917 continue;
918 markDivergent(Phi);
919 }
920}
921
922/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
923///
924/// \return true iff \p Candidate was added to \p Cycles.
925template <typename CycleT>
926static bool insertIfNotContained(SmallVector<CycleT *> &Cycles,
927 CycleT *Candidate) {
928 if (llvm::any_of(Cycles,
929 [Candidate](CycleT *C) { return C->contains(Candidate); }))
930 return false;
931 Cycles.push_back(Candidate);
932 return true;
933}
934
935/// Return the outermost cycle made divergent by branch outside it.
936///
937/// If two paths that diverged outside an irreducible cycle join
938/// inside that cycle, then that whole cycle is assumed to be
939/// divergent. This does not apply if the cycle is reducible.
940template <typename CycleT, typename BlockT>
941static const CycleT *getExtDivCycle(const CycleT *Cycle,
942 const BlockT *DivTermBlock,
943 const BlockT *JoinBlock) {
944 assert(Cycle);
945 assert(Cycle->contains(JoinBlock));
946
947 if (Cycle->contains(DivTermBlock))
948 return nullptr;
949
950 const auto *OriginalCycle = Cycle;
951 const auto *Parent = Cycle->getParentCycle();
952 while (Parent && !Parent->contains(DivTermBlock)) {
953 Cycle = Parent;
954 Parent = Cycle->getParentCycle();
955 }
956
957 // If the original cycle is not the outermost cycle, then the outermost cycle
958 // is irreducible. If the outermost cycle were reducible, then external
959 // diverged paths would not reach the original inner cycle.
960 (void)OriginalCycle;
961 assert(Cycle == OriginalCycle || !Cycle->isReducible());
962
963 if (Cycle->isReducible()) {
964 assert(Cycle->getHeader() == JoinBlock);
965 return nullptr;
966 }
967
968 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
969 return Cycle;
970}
971
972/// Return the outermost cycle made divergent by branch inside it.
973///
974/// This checks the "diverged entry" criterion defined in the
975/// docs/ConvergenceAnalysis.html.
976template <typename ContextT, typename CycleT, typename BlockT,
977 typename DominatorTreeT>
978static const CycleT *
979getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
980 const BlockT *JoinBlock, const DominatorTreeT &DT,
981 ContextT &Context) {
982 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
983 << " for internal branch " << Context.print(DivTermBlock)
984 << "\n");
985 if (DT.properlyDominates(DivTermBlock, JoinBlock))
986 return nullptr;
987
988 // Find the smallest common cycle, if one exists.
989 assert(Cycle && Cycle->contains(JoinBlock));
990 while (Cycle && !Cycle->contains(DivTermBlock)) {
991 Cycle = Cycle->getParentCycle();
992 }
993 if (!Cycle || Cycle->isReducible())
994 return nullptr;
995
996 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
997 return nullptr;
998
999 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1000 << " does not dominate join\n");
1001
1002 const auto *Parent = Cycle->getParentCycle();
1003 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1004 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1005 << " does not dominate join\n");
1006 Cycle = Parent;
1007 Parent = Parent->getParentCycle();
1008 }
1009
1010 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1011 return Cycle;
1012}
1013
1014template <typename ContextT, typename CycleT, typename BlockT,
1015 typename DominatorTreeT>
1016static const CycleT *
1017getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1018 const BlockT *JoinBlock, const DominatorTreeT &DT,
1019 ContextT &Context) {
1020 if (!Cycle)
1021 return nullptr;
1022
1023 // First try to expand Cycle to the largest that contains JoinBlock
1024 // but not DivTermBlock.
1025 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1026
1027 // Continue expanding to the largest cycle that contains both.
1028 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1029
1030 if (Int)
1031 return Int;
1032 return Ext;
1033}
1034
1035template <typename ContextT>
1036bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1037 const BlockT &ObservingBlock, const InstructionT &Def) const {
1038 const BlockT *DefBlock = Def.getParent();
1039 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1040 Cycle && !Cycle->contains(&ObservingBlock);
1041 Cycle = Cycle->getParentCycle()) {
1042 if (DivergentExitCycles.contains(Cycle)) {
1043 return true;
1044 }
1045 }
1046 return false;
1047}
1048
1049template <typename ContextT>
1050void GenericUniformityAnalysisImpl<ContextT>::analyzeControlDivergence(
1051 const InstructionT &Term) {
1052 const auto *DivTermBlock = Term.getParent();
1053 DivergentTermBlocks.insert(DivTermBlock);
1054 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1055 << "\n");
1056
1057 // Don't propagate divergence from unreachable blocks.
1058 if (!DT.isReachableFromEntry(DivTermBlock))
1059 return;
1060
1061 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1062 SmallVector<const CycleT *> DivCycles;
1063
1064 // Iterate over all blocks now reachable by a disjoint path join
1065 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1066 const auto *Cycle = CI.getCycle(JoinBlock);
1067 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1068 << "\n");
1069 if (const auto *Outermost = getOutermostDivergentCycle(
1070 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1071 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1072 DivCycles.push_back(Outermost);
1073 continue;
1074 }
1075 taintAndPushPhiNodes(JoinBlock: *JoinBlock);
1076 }
1077
1078 // Sort by order of decreasing depth. This allows later cycles to be skipped
1079 // because they are already contained in earlier ones.
1080 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1081 return A->getDepth() > B->getDepth();
1082 });
1083
1084 // Cycles that are assumed divergent due to the diverged entry
1085 // criterion potentially contain temporal divergence depending on
1086 // the DFS chosen. Conservatively, all values produced in such a
1087 // cycle are assumed divergent. "Cycle invariant" values may be
1088 // assumed uniform, but that requires further analysis.
1089 for (auto *C : DivCycles) {
1090 if (!insertIfNotContained(AssumedDivergent, C))
1091 continue;
1092 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1093 for (const BlockT *BB : C->blocks()) {
1094 taintAndPushAllDefs(BB: *BB);
1095 }
1096 }
1097
1098 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1099 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1100 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1101 propagateCycleExitDivergence(DivExit: *DivExitBlock, InnerDivCycle: *BranchCycle);
1102 }
1103}
1104
1105template <typename ContextT>
1106void GenericUniformityAnalysisImpl<ContextT>::compute() {
1107 // Initialize worklist.
1108 auto DivValuesCopy = DivergentValues;
1109 for (const auto DivVal : DivValuesCopy) {
1110 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1111 pushUsers(DivVal);
1112 }
1113
1114 // All values on the Worklist are divergent.
1115 // Their users may not have been updated yet.
1116 while (!Worklist.empty()) {
1117 const InstructionT *I = Worklist.back();
1118 Worklist.pop_back();
1119
1120 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1121
1122 if (I->isTerminator()) {
1123 analyzeControlDivergence(Term: *I);
1124 continue;
1125 }
1126
1127 // propagate value divergence to users
1128 assert(isDivergent(*I) && "Worklist invariant violated!");
1129 pushUsers(*I);
1130 }
1131}
1132
1133template <typename ContextT>
1134bool GenericUniformityAnalysisImpl<ContextT>::isAlwaysUniform(
1135 const InstructionT &Instr) const {
1136 return UniformOverrides.contains(&Instr);
1137}
1138
1139template <typename ContextT>
1140GenericUniformityInfo<ContextT>::GenericUniformityInfo(
1141 const DominatorTreeT &DT, const CycleInfoT &CI,
1142 const TargetTransformInfo *TTI) {
1143 DA.reset(new ImplT{DT, CI, TTI});
1144}
1145
1146template <typename ContextT>
1147void GenericUniformityAnalysisImpl<ContextT>::print(raw_ostream &OS) const {
1148 bool haveDivergentArgs = false;
1149
1150 // Control flow instructions may be divergent even if their inputs are
1151 // uniform. Thus, although exceedingly rare, it is possible to have a program
1152 // with no divergent values but with divergent control structures.
1153 if (DivergentValues.empty() && DivergentTermBlocks.empty() &&
1154 DivergentExitCycles.empty()) {
1155 OS << "ALL VALUES UNIFORM\n";
1156 return;
1157 }
1158
1159 for (const auto &entry : DivergentValues) {
1160 const BlockT *parent = Context.getDefBlock(entry);
1161 if (!parent) {
1162 if (!haveDivergentArgs) {
1163 OS << "DIVERGENT ARGUMENTS:\n";
1164 haveDivergentArgs = true;
1165 }
1166 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1167 }
1168 }
1169
1170 if (!AssumedDivergent.empty()) {
1171 OS << "CYCLES ASSSUMED DIVERGENT:\n";
1172 for (const CycleT *cycle : AssumedDivergent) {
1173 OS << " " << cycle->print(Context) << '\n';
1174 }
1175 }
1176
1177 if (!DivergentExitCycles.empty()) {
1178 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1179 for (const CycleT *cycle : DivergentExitCycles) {
1180 OS << " " << cycle->print(Context) << '\n';
1181 }
1182 }
1183
1184 for (auto &block : F) {
1185 OS << "\nBLOCK " << Context.print(&block) << '\n';
1186
1187 OS << "DEFINITIONS\n";
1188 SmallVector<ConstValueRefT, 16> defs;
1189 Context.appendBlockDefs(defs, block);
1190 for (auto value : defs) {
1191 if (isDivergent(value))
1192 OS << " DIVERGENT: ";
1193 else
1194 OS << " ";
1195 OS << Context.print(value) << '\n';
1196 }
1197
1198 OS << "TERMINATORS\n";
1199 SmallVector<const InstructionT *, 8> terms;
1200 Context.appendBlockTerms(terms, block);
1201 bool divergentTerminators = hasDivergentTerminator(B: block);
1202 for (auto *T : terms) {
1203 if (divergentTerminators)
1204 OS << " DIVERGENT: ";
1205 else
1206 OS << " ";
1207 OS << Context.print(T) << '\n';
1208 }
1209
1210 OS << "END BLOCK\n";
1211 }
1212}
1213
1214template <typename ContextT>
1215bool GenericUniformityInfo<ContextT>::hasDivergence() const {
1216 return DA->hasDivergence();
1217}
1218
1219template <typename ContextT>
1220const typename ContextT::FunctionT &
1221GenericUniformityInfo<ContextT>::getFunction() const {
1222 return DA->getFunction();
1223}
1224
1225/// Whether \p V is divergent at its definition.
1226template <typename ContextT>
1227bool GenericUniformityInfo<ContextT>::isDivergent(ConstValueRefT V) const {
1228 return DA->isDivergent(V);
1229}
1230
1231template <typename ContextT>
1232bool GenericUniformityInfo<ContextT>::isDivergent(const InstructionT *I) const {
1233 return DA->isDivergent(*I);
1234}
1235
1236template <typename ContextT>
1237bool GenericUniformityInfo<ContextT>::isDivergentUse(const UseT &U) const {
1238 return DA->isDivergentUse(U);
1239}
1240
1241template <typename ContextT>
1242bool GenericUniformityInfo<ContextT>::hasDivergentTerminator(const BlockT &B) {
1243 return DA->hasDivergentTerminator(B);
1244}
1245
1246/// \brief T helper function for printing.
1247template <typename ContextT>
1248void GenericUniformityInfo<ContextT>::print(raw_ostream &out) const {
1249 DA->print(out);
1250}
1251
1252template <typename ContextT>
1253void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1254 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1255 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1256 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1257 while (!Stack.empty()) {
1258 auto *NextBB = Stack.back();
1259 if (Finalized.count(NextBB)) {
1260 Stack.pop_back();
1261 continue;
1262 }
1263 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1264 << "\n");
1265 auto *NestedCycle = CI.getCycle(NextBB);
1266 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1267 LLVM_DEBUG(dbgs() << " found a cycle\n");
1268 while (NestedCycle->getParentCycle() != Cycle)
1269 NestedCycle = NestedCycle->getParentCycle();
1270
1271 SmallVector<BlockT *, 3> NestedExits;
1272 NestedCycle->getExitBlocks(NestedExits);
1273 bool PushedNodes = false;
1274 for (auto *NestedExitBB : NestedExits) {
1275 LLVM_DEBUG(dbgs() << " examine exit: "
1276 << CI.getSSAContext().print(NestedExitBB) << "\n");
1277 if (Cycle && !Cycle->contains(NestedExitBB))
1278 continue;
1279 if (Finalized.count(NestedExitBB))
1280 continue;
1281 PushedNodes = true;
1282 Stack.push_back(NestedExitBB);
1283 LLVM_DEBUG(dbgs() << " pushed exit: "
1284 << CI.getSSAContext().print(NestedExitBB) << "\n");
1285 }
1286 if (!PushedNodes) {
1287 // All loop exits finalized -> finish this node
1288 Stack.pop_back();
1289 computeCyclePO(CI, Cycle: NestedCycle, Finalized);
1290 }
1291 continue;
1292 }
1293
1294 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1295 // DAG-style
1296 bool PushedNodes = false;
1297 for (auto *SuccBB : successors(NextBB)) {
1298 LLVM_DEBUG(dbgs() << " examine succ: "
1299 << CI.getSSAContext().print(SuccBB) << "\n");
1300 if (Cycle && !Cycle->contains(SuccBB))
1301 continue;
1302 if (Finalized.count(SuccBB))
1303 continue;
1304 PushedNodes = true;
1305 Stack.push_back(SuccBB);
1306 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1307 << "\n");
1308 }
1309 if (!PushedNodes) {
1310 // Never push nodes twice
1311 LLVM_DEBUG(dbgs() << " finishing node: "
1312 << CI.getSSAContext().print(NextBB) << "\n");
1313 Stack.pop_back();
1314 Finalized.insert(NextBB);
1315 appendBlock(BB: *NextBB);
1316 }
1317 }
1318 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1319}
1320
1321template <typename ContextT>
1322void ModifiedPostOrder<ContextT>::computeCyclePO(
1323 const CycleInfoT &CI, const CycleT *Cycle,
1324 SmallPtrSetImpl<const BlockT *> &Finalized) {
1325 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1326 SmallVector<const BlockT *> Stack;
1327 auto *CycleHeader = Cycle->getHeader();
1328
1329 LLVM_DEBUG(dbgs() << " noted header: "
1330 << CI.getSSAContext().print(CycleHeader) << "\n");
1331 assert(!Finalized.count(CycleHeader));
1332 Finalized.insert(CycleHeader);
1333
1334 // Visit the header last
1335 LLVM_DEBUG(dbgs() << " finishing header: "
1336 << CI.getSSAContext().print(CycleHeader) << "\n");
1337 appendBlock(BB: *CycleHeader, isReducibleCycleHeader: Cycle->isReducible());
1338
1339 // Initialize with immediate successors
1340 for (auto *BB : successors(CycleHeader)) {
1341 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1342 << "\n");
1343 if (!Cycle->contains(BB))
1344 continue;
1345 if (BB == CycleHeader)
1346 continue;
1347 if (!Finalized.count(BB)) {
1348 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1349 << "\n");
1350 Stack.push_back(BB);
1351 }
1352 }
1353
1354 // Compute PO inside region
1355 computeStackPO(Stack, CI, Cycle, Finalized);
1356
1357 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1358}
1359
1360/// \brief Generically compute the modified post order.
1361template <typename ContextT>
1362void llvm::ModifiedPostOrder<ContextT>::compute(const CycleInfoT &CI) {
1363 SmallPtrSet<const BlockT *, 32> Finalized;
1364 SmallVector<const BlockT *> Stack;
1365 auto *F = CI.getFunction();
1366 Stack.reserve(24); // FIXME made-up number
1367 Stack.push_back(&F->front());
1368 computeStackPO(Stack, CI, Cycle: nullptr, Finalized);
1369}
1370
1371} // namespace llvm
1372
1373#undef DEBUG_TYPE
1374
1375#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
1376

source code of llvm/include/llvm/ADT/GenericUniformityImpl.h