1 | //===- CFGToSCF.h - Control Flow Graph to Structured Control Flow *- C++ -*===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This code is an implementation of: |
10 | // Helge Bahmann, Nico Reissmann, Magnus Jahre, and Jan Christian Meyer. 2015. |
11 | // Perfect Reconstructability of Control Flow from Demand Dependence Graphs. ACM |
12 | // Trans. Archit. Code Optim. 11, 4, Article 66 (January 2015), 25 pages. |
13 | // https://doi.org/10.1145/2693261 |
14 | // |
15 | // It defines an algorithm to translate any control flow graph with a single |
16 | // entry and single exit block into structured control flow operations |
17 | // consisting of regions of do-while loops and operations conditionally |
18 | // dispatching to one out of multiple regions before continuing after the |
19 | // operation. This includes control flow graphs containing irreducible |
20 | // control flow. |
21 | // |
22 | // The implementation here additionally supports the transformation on |
23 | // regions with multiple exit blocks. This is implemented by first |
24 | // transforming all occurrences of return-like operations to branch to a |
25 | // single exit block containing an instance of that return-like operation. |
26 | // If there are multiple kinds of return-like operations, multiple exit |
27 | // blocks are created. In that case the transformation leaves behind a |
28 | // conditional control flow graph operation that dispatches to the given regions |
29 | // terminating with different kinds of return-like operations each. |
30 | // |
31 | // If the function only contains a single kind of return-like operations, |
32 | // it is guaranteed that all control flow graph ops will be lifted to structured |
33 | // control flow, and that no more control flow graph ops remain after the |
34 | // operation. |
35 | // |
36 | // The algorithm to lift CFGs consists of two transformations applied after each |
37 | // other on any single-entry, single-exit region: |
38 | // 1) Lifting cycles to structured control flow loops |
39 | // 2) Lifting conditional branches to structured control flow branches |
40 | // These are then applied recursively on any new single-entry single-exit |
41 | // regions created by the transformation until no more CFG operations remain. |
42 | // |
43 | // The first part of cycle lifting is to detect any cycles in the CFG. |
44 | // This is done using an algorithm for iterating over SCCs. Every SCC |
45 | // representing a cycle is then transformed into a structured loop with a single |
46 | // entry block and a single latch containing the only back edge to the entry |
47 | // block and the only edge to an exit block outside the loop. Rerouting control |
48 | // flow to create single entry and exit blocks is achieved via a multiplexer |
49 | // construct that can be visualized as follows: |
50 | // +-----+ +-----+ +-----+ |
51 | // | bb0 | | bb1 |...| bbN | |
52 | // +--+--+ +--+--+ +-+---+ |
53 | // | | | |
54 | // | v | |
55 | // | +------+ | |
56 | // | ++ ++<----+ |
57 | // | | Region | |
58 | // +>| |<----+ |
59 | // ++ ++ | |
60 | // +------+------+ |
61 | // |
62 | // The above transforms to: |
63 | // +-----+ +-----+ +-----+ |
64 | // | bb0 | | bb1 |...| bbN | |
65 | // +-----+ +--|--+ ++----+ |
66 | // | v | |
67 | // +->+-----+<---+ |
68 | // | bbM |<-------+ |
69 | // +---+-+ | |
70 | // +---+ | +----+ | |
71 | // | v | | |
72 | // | +------+ | | |
73 | // | ++ ++<-+ | |
74 | // +->| Region | | |
75 | // ++ ++ | |
76 | // +------+-------+ |
77 | // |
78 | // bbM in the above is the multiplexer block, and any block previously branching |
79 | // to an entry block of the region are redirected to it. This includes any |
80 | // branches from within the region. Using a block argument, bbM then dispatches |
81 | // to the correct entry block of the region dependent on the predecessor. |
82 | // |
83 | // A similar transformation is done to create the latch block with the single |
84 | // back edge and loop exit edge. |
85 | // |
86 | // The above form has the advantage that bbM now acts as the loop header |
87 | // of the loop body. After the transformation on the latch, this results in a |
88 | // structured loop that can then be lifted to structured control flow. The |
89 | // conditional branches created in bbM are later lifted to conditional |
90 | // branches. |
91 | // |
92 | // Lifting conditional branches is done by analyzing the *first* conditional |
93 | // branch encountered in the entry region. The algorithm then identifies |
94 | // all blocks that are dominated by a specific control flow edge and |
95 | // the region where control flow continues: |
96 | // +-----+ |
97 | // +-----+ bb0 +----+ |
98 | // v +-----+ v |
99 | // Region 1 +-+-+ ... +-+-+ Region n |
100 | // +---+ +---+ |
101 | // ... ... |
102 | // | | |
103 | // | +---+ | |
104 | // +---->++ ++<---+ |
105 | // | | |
106 | // ++ ++ Region T |
107 | // +---+ |
108 | // Every region following bb0 consists of 0 or more blocks that eventually |
109 | // branch to Region T. If there are multiple entry blocks into Region T, a |
110 | // single entry block is created using a multiplexer block as shown above. |
111 | // Region 1 to Region n are then lifted together with the conditional control |
112 | // flow operation terminating bb0 into a structured conditional operation |
113 | // followed by the operations of the entry block of Region T. |
114 | //===----------------------------------------------------------------------===// |
115 | |
116 | #include "mlir/Transforms/CFGToSCF.h" |
117 | |
118 | #include "mlir/IR/RegionGraphTraits.h" |
119 | #include "mlir/Interfaces/ControlFlowInterfaces.h" |
120 | #include "mlir/Interfaces/SideEffectInterfaces.h" |
121 | #include "llvm/ADT/DepthFirstIterator.h" |
122 | #include "llvm/ADT/MapVector.h" |
123 | #include "llvm/ADT/SCCIterator.h" |
124 | #include "llvm/ADT/SetVector.h" |
125 | #include "llvm/ADT/SmallPtrSet.h" |
126 | |
127 | using namespace mlir; |
128 | |
129 | /// Returns the mutable operand range used to transfer operands from `block` to |
130 | /// its successor with the given index. The returned range being mutable allows |
131 | /// us to modify the operands being transferred. |
132 | static MutableOperandRange |
133 | getMutableSuccessorOperands(Block *block, unsigned successorIndex) { |
134 | auto branchOpInterface = cast<BranchOpInterface>(block->getTerminator()); |
135 | SuccessorOperands succOps = |
136 | branchOpInterface.getSuccessorOperands(successorIndex); |
137 | return succOps.getMutableForwardedOperands(); |
138 | } |
139 | |
140 | /// Return the operand range used to transfer operands from `block` to its |
141 | /// successor with the given index. |
142 | static OperandRange getSuccessorOperands(Block *block, |
143 | unsigned successorIndex) { |
144 | return getMutableSuccessorOperands(block, successorIndex); |
145 | } |
146 | |
147 | /// Appends all the block arguments from `other` to the block arguments of |
148 | /// `block`, copying their types and locations. |
149 | static void addBlockArgumentsFromOther(Block *block, Block *other) { |
150 | for (BlockArgument arg : other->getArguments()) |
151 | block->addArgument(type: arg.getType(), loc: arg.getLoc()); |
152 | } |
153 | |
154 | namespace { |
155 | |
156 | /// Class representing an edge in the CFG. Consists of a from-block, a successor |
157 | /// and corresponding successor operands passed to the block arguments of the |
158 | /// successor. |
159 | class Edge { |
160 | Block *fromBlock; |
161 | unsigned successorIndex; |
162 | |
163 | public: |
164 | /// Constructs a new edge from `fromBlock` to the successor corresponding to |
165 | /// `successorIndex`. |
166 | Edge(Block *fromBlock, unsigned int successorIndex) |
167 | : fromBlock(fromBlock), successorIndex(successorIndex) {} |
168 | |
169 | /// Returns the from-block. |
170 | Block *getFromBlock() const { return fromBlock; } |
171 | |
172 | /// Returns the successor of the edge. |
173 | Block *getSuccessor() const { |
174 | return fromBlock->getSuccessor(i: successorIndex); |
175 | } |
176 | |
177 | /// Sets the successor of the edge, adjusting the terminator in the |
178 | /// from-block. |
179 | void setSuccessor(Block *block) const { |
180 | fromBlock->getTerminator()->setSuccessor(block, index: successorIndex); |
181 | } |
182 | |
183 | /// Returns the arguments of this edge that are passed to the block arguments |
184 | /// of the successor. |
185 | MutableOperandRange getMutableSuccessorOperands() const { |
186 | return ::getMutableSuccessorOperands(block: fromBlock, successorIndex); |
187 | } |
188 | |
189 | /// Returns the arguments of this edge that are passed to the block arguments |
190 | /// of the successor. |
191 | OperandRange getSuccessorOperands() const { |
192 | return ::getSuccessorOperands(block: fromBlock, successorIndex); |
193 | } |
194 | }; |
195 | |
196 | /// Structure containing the entry, exit and back edges of a cycle. A cycle is a |
197 | /// generalization of a loop that may have multiple entry edges. See also |
198 | /// https://llvm.org/docs/CycleTerminology.html. |
199 | struct CycleEdges { |
200 | /// All edges from a block outside the cycle to a block inside the cycle. |
201 | /// The targets of these edges are entry blocks. |
202 | SmallVector<Edge> entryEdges; |
203 | /// All edges from a block inside the cycle to a block outside the cycle. |
204 | SmallVector<Edge> exitEdges; |
205 | /// All edges from a block inside the cycle to an entry block. |
206 | SmallVector<Edge> backEdges; |
207 | }; |
208 | |
209 | /// Class used to orchestrate creation of so-called edge multiplexers. |
210 | /// This class creates a new basic block and routes all inputs edges |
211 | /// to this basic block before branching to their original target. |
212 | /// The purpose of this transformation is to create single-entry, |
213 | /// single-exit regions. |
214 | class EdgeMultiplexer { |
215 | public: |
216 | /// Creates a new edge multiplexer capable of redirecting all edges to one of |
217 | /// the `entryBlocks`. This creates the multiplexer basic block with |
218 | /// appropriate block arguments after the first entry block. `extraArgs` |
219 | /// contains the types of possible extra block arguments passed to the |
220 | /// multiplexer block that are added to the successor operands of every |
221 | /// outgoing edge. |
222 | /// |
223 | /// NOTE: This does not yet redirect edges to branch to the |
224 | /// multiplexer block nor code dispatching from the multiplexer code |
225 | /// to the original successors. |
226 | /// See `redirectEdge` and `createSwitch`. |
227 | static EdgeMultiplexer create(Location loc, ArrayRef<Block *> entryBlocks, |
228 | function_ref<Value(unsigned)> getSwitchValue, |
229 | function_ref<Value(Type)> getUndefValue, |
230 | TypeRange = {}) { |
231 | assert(!entryBlocks.empty() && "Require at least one entry block" ); |
232 | |
233 | auto *multiplexerBlock = new Block; |
234 | multiplexerBlock->insertAfter(block: entryBlocks.front()); |
235 | |
236 | // To implement the multiplexer block, we have to add the block arguments of |
237 | // every distinct successor block to the multiplexer block. When redirecting |
238 | // edges, block arguments designated for blocks that aren't branched to will |
239 | // be assigned the `getUndefValue`. The amount of block arguments and their |
240 | // offset is saved in the map for `redirectEdge` to transform the edges. |
241 | llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping; |
242 | for (Block *entryBlock : entryBlocks) { |
243 | auto [iter, inserted] = blockArgMapping.insert( |
244 | KV: {entryBlock, multiplexerBlock->getNumArguments()}); |
245 | if (inserted) |
246 | addBlockArgumentsFromOther(block: multiplexerBlock, other: entryBlock); |
247 | } |
248 | |
249 | // If we have more than one successor, we have to additionally add a |
250 | // discriminator value, denoting which successor to jump to. |
251 | // When redirecting edges, an appropriate value will be passed using |
252 | // `getSwitchValue`. |
253 | Value discriminator; |
254 | if (blockArgMapping.size() > 1) |
255 | discriminator = |
256 | multiplexerBlock->addArgument(type: getSwitchValue(0).getType(), loc); |
257 | |
258 | multiplexerBlock->addArguments( |
259 | types: extraArgs, locs: SmallVector<Location>(extraArgs.size(), loc)); |
260 | |
261 | return EdgeMultiplexer(multiplexerBlock, getSwitchValue, getUndefValue, |
262 | std::move(blockArgMapping), discriminator); |
263 | } |
264 | |
265 | /// Returns the created multiplexer block. |
266 | Block *getMultiplexerBlock() const { return multiplexerBlock; } |
267 | |
268 | /// Redirects `edge` to branch to the multiplexer block before continuing to |
269 | /// its original target. The edges successor must have originally been part |
270 | /// of the entry blocks array passed to the `create` function. `extraArgs` |
271 | /// must be used to pass along any additional values corresponding to |
272 | /// `extraArgs` in `create`. |
273 | void redirectEdge(Edge edge, ValueRange = {}) const { |
274 | const auto *result = blockArgMapping.find(Key: edge.getSuccessor()); |
275 | assert(result != blockArgMapping.end() && |
276 | "Edge was not originally passed to `create` method." ); |
277 | |
278 | MutableOperandRange successorOperands = edge.getMutableSuccessorOperands(); |
279 | |
280 | // Extra arguments are always appended at the end of the block arguments. |
281 | unsigned = |
282 | multiplexerBlock->getNumArguments() - extraArgs.size(); |
283 | // If a discriminator exists, it is right before the extra arguments. |
284 | std::optional<unsigned> discriminatorIndex = |
285 | discriminator ? extraArgsBeginIndex - 1 : std::optional<unsigned>{}; |
286 | |
287 | SmallVector<Value> newSuccOperands(multiplexerBlock->getNumArguments()); |
288 | for (BlockArgument argument : multiplexerBlock->getArguments()) { |
289 | unsigned index = argument.getArgNumber(); |
290 | if (index >= result->second && |
291 | index < result->second + edge.getSuccessor()->getNumArguments()) { |
292 | // Original block arguments to the entry block. |
293 | newSuccOperands[index] = |
294 | successorOperands[index - result->second].get(); |
295 | continue; |
296 | } |
297 | |
298 | // Discriminator value if it exists. |
299 | if (index == discriminatorIndex) { |
300 | newSuccOperands[index] = |
301 | getSwitchValue(result - blockArgMapping.begin()); |
302 | continue; |
303 | } |
304 | |
305 | // Followed by the extra arguments. |
306 | if (index >= extraArgsBeginIndex) { |
307 | newSuccOperands[index] = extraArgs[index - extraArgsBeginIndex]; |
308 | continue; |
309 | } |
310 | |
311 | // Otherwise undef values for any unused block arguments used by other |
312 | // entry blocks. |
313 | newSuccOperands[index] = getUndefValue(argument.getType()); |
314 | } |
315 | |
316 | edge.setSuccessor(multiplexerBlock); |
317 | successorOperands.assign(values: newSuccOperands); |
318 | } |
319 | |
320 | /// Creates a switch op using `builder` which dispatches to the original |
321 | /// successors of the edges passed to `create` minus the ones in `excluded`. |
322 | /// The builder's insertion point has to be in a block dominated by the |
323 | /// multiplexer block. All edges to the multiplexer block must have already |
324 | /// been redirected using `redirectEdge`. |
325 | void createSwitch( |
326 | Location loc, OpBuilder &builder, CFGToSCFInterface &interface, |
327 | const SmallPtrSetImpl<Block *> &excluded = SmallPtrSet<Block *, 1>{}) { |
328 | // We create the switch by creating a case for all entries and then |
329 | // splitting of the last entry as a default case. |
330 | |
331 | SmallVector<ValueRange> caseArguments; |
332 | SmallVector<unsigned> caseValues; |
333 | SmallVector<Block *> caseDestinations; |
334 | for (auto &&[index, pair] : llvm::enumerate(First&: blockArgMapping)) { |
335 | auto &&[succ, offset] = pair; |
336 | if (excluded.contains(Ptr: succ)) |
337 | continue; |
338 | |
339 | caseValues.push_back(Elt: index); |
340 | caseArguments.push_back(Elt: multiplexerBlock->getArguments().slice( |
341 | N: offset, M: succ->getNumArguments())); |
342 | caseDestinations.push_back(Elt: succ); |
343 | } |
344 | |
345 | // If we don't have a discriminator due to only having one entry we have to |
346 | // create a dummy flag for the switch. |
347 | Value realDiscriminator = discriminator; |
348 | if (!realDiscriminator || caseArguments.size() == 1) |
349 | realDiscriminator = getSwitchValue(0); |
350 | |
351 | caseValues.pop_back(); |
352 | Block *defaultDest = caseDestinations.pop_back_val(); |
353 | ValueRange defaultArgs = caseArguments.pop_back_val(); |
354 | |
355 | assert(!builder.getInsertionBlock()->hasNoPredecessors() && |
356 | "Edges need to be redirected prior to creating switch." ); |
357 | interface.createCFGSwitchOp(loc, builder, flag: realDiscriminator, caseValues, |
358 | caseDestinations, caseArguments, defaultDest, |
359 | defaultArgs); |
360 | } |
361 | |
362 | private: |
363 | /// Newly created multiplexer block. |
364 | Block *multiplexerBlock; |
365 | /// Callback used to create a constant suitable as flag for |
366 | /// the interfaces `createCFGSwitchOp`. |
367 | function_ref<Value(unsigned)> getSwitchValue; |
368 | /// Callback used to create undefined values of a given type. |
369 | function_ref<Value(Type)> getUndefValue; |
370 | |
371 | /// Mapping of the block arguments of an entry block to the corresponding |
372 | /// block arguments in the multiplexer block. Block arguments of an entry |
373 | /// block are simply appended ot the multiplexer block. This map simply |
374 | /// contains the offset to the range in the multiplexer block. |
375 | llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping; |
376 | /// Discriminator value used in the multiplexer block to dispatch to the |
377 | /// correct entry block. Null value if not required due to only having one |
378 | /// entry block. |
379 | Value discriminator; |
380 | |
381 | EdgeMultiplexer(Block *multiplexerBlock, |
382 | function_ref<Value(unsigned)> getSwitchValue, |
383 | function_ref<Value(Type)> getUndefValue, |
384 | llvm::SmallMapVector<Block *, unsigned, 4> &&entries, |
385 | Value dispatchFlag) |
386 | : multiplexerBlock(multiplexerBlock), getSwitchValue(getSwitchValue), |
387 | getUndefValue(getUndefValue), blockArgMapping(std::move(entries)), |
388 | discriminator(dispatchFlag) {} |
389 | }; |
390 | |
391 | /// Alternative implementation of DenseMapInfo<Operation*> using the operation |
392 | /// equivalence infrastructure to check whether two 'return-like' operations are |
393 | /// equivalent in the context of this transformation. This means that both |
394 | /// operations are of the same kind, have the same amount of operands and types |
395 | /// and the same attributes and properties. The operands themselves don't have |
396 | /// to be equivalent. |
397 | struct ReturnLikeOpEquivalence : public llvm::DenseMapInfo<Operation *> { |
398 | static unsigned getHashValue(const Operation *opC) { |
399 | return OperationEquivalence::computeHash( |
400 | op: const_cast<Operation *>(opC), |
401 | /*hashOperands=*/OperationEquivalence::ignoreHashValue, |
402 | /*hashResults=*/OperationEquivalence::ignoreHashValue, |
403 | flags: OperationEquivalence::IgnoreLocations); |
404 | } |
405 | |
406 | static bool isEqual(const Operation *lhs, const Operation *rhs) { |
407 | if (lhs == rhs) |
408 | return true; |
409 | if (lhs == getTombstoneKey() || lhs == getEmptyKey() || |
410 | rhs == getTombstoneKey() || rhs == getEmptyKey()) |
411 | return false; |
412 | return OperationEquivalence::isEquivalentTo( |
413 | lhs: const_cast<Operation *>(lhs), rhs: const_cast<Operation *>(rhs), |
414 | checkEquivalent: OperationEquivalence::ignoreValueEquivalence, markEquivalent: nullptr, |
415 | flags: OperationEquivalence::IgnoreLocations); |
416 | } |
417 | }; |
418 | |
419 | /// Utility-class for transforming a region to only have one single block for |
420 | /// every return-like operation. |
421 | class ReturnLikeExitCombiner { |
422 | public: |
423 | ReturnLikeExitCombiner(Region &topLevelRegion, CFGToSCFInterface &interface) |
424 | : topLevelRegion(topLevelRegion), interface(interface) {} |
425 | |
426 | /// Transforms `returnLikeOp` to a branch to the only block in the |
427 | /// region with an instance of `returnLikeOp`s kind. |
428 | void combineExit(Operation *returnLikeOp, |
429 | function_ref<Value(unsigned)> getSwitchValue) { |
430 | auto [iter, inserted] = |
431 | returnLikeToCombinedExit.insert(KV: {returnLikeOp, nullptr}); |
432 | if (!inserted && iter->first == returnLikeOp) |
433 | return; |
434 | |
435 | Block *exitBlock = iter->second; |
436 | if (inserted) { |
437 | exitBlock = new Block; |
438 | iter->second = exitBlock; |
439 | topLevelRegion.push_back(block: exitBlock); |
440 | exitBlock->addArguments( |
441 | types: returnLikeOp->getOperandTypes(), |
442 | locs: SmallVector<Location>(returnLikeOp->getNumOperands(), |
443 | returnLikeOp->getLoc())); |
444 | } |
445 | |
446 | auto builder = OpBuilder::atBlockTerminator(block: returnLikeOp->getBlock()); |
447 | interface.createSingleDestinationBranch(loc: returnLikeOp->getLoc(), builder, |
448 | dummyFlag: getSwitchValue(0), destination: exitBlock, |
449 | arguments: returnLikeOp->getOperands()); |
450 | |
451 | if (!inserted) { |
452 | returnLikeOp->erase(); |
453 | return; |
454 | } |
455 | |
456 | returnLikeOp->moveBefore(block: exitBlock, iterator: exitBlock->end()); |
457 | returnLikeOp->setOperands(exitBlock->getArguments()); |
458 | } |
459 | |
460 | private: |
461 | /// Mapping of return-like operation to block. All return-like operations |
462 | /// of the same kind with the same attributes, properties and types are seen |
463 | /// as equivalent. First occurrence seen is kept in the map. |
464 | llvm::SmallDenseMap<Operation *, Block *, 4, ReturnLikeOpEquivalence> |
465 | returnLikeToCombinedExit; |
466 | Region &topLevelRegion; |
467 | CFGToSCFInterface &interface; |
468 | }; |
469 | |
470 | } // namespace |
471 | |
472 | /// Returns a range of all edges from `block` to each of its successors. |
473 | static auto successorEdges(Block *block) { |
474 | return llvm::map_range(C: llvm::seq(Size: block->getNumSuccessors()), |
475 | F: [=](unsigned index) { return Edge(block, index); }); |
476 | } |
477 | |
478 | /// Calculates entry, exit and back edges of the given cycle. |
479 | static CycleEdges |
480 | calculateCycleEdges(const llvm::SmallSetVector<Block *, 4> &cycles) { |
481 | CycleEdges result; |
482 | SmallPtrSet<Block *, 8> entryBlocks; |
483 | |
484 | // First identify all exit and entry edges by checking whether any successors |
485 | // or predecessors are from outside the cycles. |
486 | for (Block *block : cycles) { |
487 | for (auto pred = block->pred_begin(); pred != block->pred_end(); pred++) { |
488 | if (cycles.contains(key: *pred)) |
489 | continue; |
490 | |
491 | result.entryEdges.emplace_back(Args: *pred, Args: pred.getSuccessorIndex()); |
492 | entryBlocks.insert(Ptr: block); |
493 | } |
494 | |
495 | for (auto &&[succIndex, succ] : llvm::enumerate(First: block->getSuccessors())) { |
496 | if (cycles.contains(key: succ)) |
497 | continue; |
498 | |
499 | result.exitEdges.emplace_back(Args&: block, Args&: succIndex); |
500 | } |
501 | } |
502 | |
503 | // With the entry blocks identified, find all the back edges. |
504 | for (Block *block : cycles) { |
505 | for (auto &&[succIndex, succ] : llvm::enumerate(First: block->getSuccessors())) { |
506 | if (!entryBlocks.contains(Ptr: succ)) |
507 | continue; |
508 | |
509 | result.backEdges.emplace_back(Args&: block, Args&: succIndex); |
510 | } |
511 | } |
512 | |
513 | return result; |
514 | } |
515 | |
516 | /// Creates a single entry block out of multiple entry edges using an edge |
517 | /// multiplexer and returns it. |
518 | static EdgeMultiplexer |
519 | createSingleEntryBlock(Location loc, ArrayRef<Edge> entryEdges, |
520 | function_ref<Value(unsigned)> getSwitchValue, |
521 | function_ref<Value(Type)> getUndefValue, |
522 | CFGToSCFInterface &interface) { |
523 | auto result = EdgeMultiplexer::create( |
524 | loc, entryBlocks: llvm::map_to_vector(C&: entryEdges, F: std::mem_fn(pm: &Edge::getSuccessor)), |
525 | getSwitchValue, getUndefValue); |
526 | |
527 | // Redirect the edges prior to creating the switch op. |
528 | // We guarantee that predecessors are up to date. |
529 | for (Edge edge : entryEdges) |
530 | result.redirectEdge(edge); |
531 | |
532 | auto builder = OpBuilder::atBlockBegin(block: result.getMultiplexerBlock()); |
533 | result.createSwitch(loc, builder, interface); |
534 | |
535 | return result; |
536 | } |
537 | |
538 | namespace { |
539 | /// Special loop properties of a structured loop. |
540 | /// A structured loop is a loop satisfying all of the following: |
541 | /// * Has at most one entry, one exit and one back edge. |
542 | /// * The back edge originates from the same block as the exit edge. |
543 | struct StructuredLoopProperties { |
544 | /// Block containing both the single exit edge and the single back edge. |
545 | Block *latch; |
546 | /// Loop condition of type equal to a value returned by `getSwitchValue`. |
547 | Value condition; |
548 | /// Exit block which is the only successor of the loop. |
549 | Block *exitBlock; |
550 | }; |
551 | } // namespace |
552 | |
553 | /// Transforms a loop into a structured loop with only a single back edge and |
554 | /// exiting edge, originating from the same block. |
555 | static FailureOr<StructuredLoopProperties> createSingleExitingLatch( |
556 | Location loc, ArrayRef<Edge> backEdges, ArrayRef<Edge> exitEdges, |
557 | function_ref<Value(unsigned)> getSwitchValue, |
558 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
559 | ReturnLikeExitCombiner &exitCombiner) { |
560 | assert(llvm::all_equal( |
561 | llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor))) && |
562 | "All repetition edges must lead to the single loop header" ); |
563 | |
564 | // First create the multiplexer block, which will be our latch, for all back |
565 | // edges and exit edges. We pass an additional argument to the multiplexer |
566 | // block which indicates whether the latch was reached from what was |
567 | // originally a back edge or an exit block. |
568 | // This is later used to branch using the new only back edge. |
569 | SmallVector<Block *> successors; |
570 | llvm::append_range( |
571 | C&: successors, R: llvm::map_range(C&: backEdges, F: std::mem_fn(pm: &Edge::getSuccessor))); |
572 | llvm::append_range( |
573 | C&: successors, R: llvm::map_range(C&: exitEdges, F: std::mem_fn(pm: &Edge::getSuccessor))); |
574 | auto multiplexer = |
575 | EdgeMultiplexer::create(loc, entryBlocks: successors, getSwitchValue, getUndefValue, |
576 | /*extraArgs=*/getSwitchValue(0).getType()); |
577 | |
578 | auto *latchBlock = multiplexer.getMultiplexerBlock(); |
579 | |
580 | // Create a separate exit block that comes right after the latch. |
581 | auto *exitBlock = new Block; |
582 | exitBlock->insertAfter(block: latchBlock); |
583 | |
584 | // Since this is a loop, all back edges point to the same loop header. |
585 | Block * = backEdges.front().getSuccessor(); |
586 | |
587 | // Redirect the edges prior to creating the switch op. |
588 | // We guarantee that predecessors are up to date. |
589 | |
590 | // Redirecting back edges with `shouldRepeat` as 1. |
591 | for (Edge backEdge : backEdges) |
592 | multiplexer.redirectEdge(edge: backEdge, /*extraArgs=*/getSwitchValue(1)); |
593 | |
594 | // Redirecting exits edges with `shouldRepeat` as 0. |
595 | for (Edge exitEdge : exitEdges) |
596 | multiplexer.redirectEdge(edge: exitEdge, /*extraArgs=*/getSwitchValue(0)); |
597 | |
598 | // Create the new only back edge to the loop header. Branch to the |
599 | // exit block otherwise. |
600 | Value shouldRepeat = latchBlock->getArguments().back(); |
601 | { |
602 | auto builder = OpBuilder::atBlockBegin(block: latchBlock); |
603 | interface.createConditionalBranch( |
604 | loc, builder, condition: shouldRepeat, trueDest: loopHeader, |
605 | trueArgs: latchBlock->getArguments().take_front(N: loopHeader->getNumArguments()), |
606 | /*falseDest=*/exitBlock, |
607 | /*falseArgs=*/{}); |
608 | } |
609 | |
610 | { |
611 | auto builder = OpBuilder::atBlockBegin(block: exitBlock); |
612 | if (!exitEdges.empty()) { |
613 | // Create the switch dispatching to what were originally the multiple exit |
614 | // blocks. The loop header has to explicitly be excluded in the below |
615 | // switch as we would otherwise be creating a new loop again. All back |
616 | // edges leading to the loop header have already been handled in the |
617 | // switch above. The remaining edges can only jump to blocks outside the |
618 | // loop. |
619 | |
620 | SmallPtrSet<Block *, 1> excluded = {loopHeader}; |
621 | multiplexer.createSwitch(loc, builder, interface, excluded); |
622 | } else { |
623 | // A loop without an exit edge is a statically known infinite loop. |
624 | // Since structured control flow ops are not terminator ops, the caller |
625 | // has to create a fitting return-like unreachable terminator operation. |
626 | FailureOr<Operation *> terminator = interface.createUnreachableTerminator( |
627 | loc, builder, region&: *latchBlock->getParent()); |
628 | if (failed(result: terminator)) |
629 | return failure(); |
630 | // Transform the just created transform operation in the case that an |
631 | // occurrence of it existed in input IR. |
632 | exitCombiner.combineExit(returnLikeOp: *terminator, getSwitchValue); |
633 | } |
634 | } |
635 | |
636 | return StructuredLoopProperties{.latch: latchBlock, /*condition=*/shouldRepeat, |
637 | .exitBlock: exitBlock}; |
638 | } |
639 | |
640 | /// Transforms a structured loop into a loop in reduce form. |
641 | /// |
642 | /// Reduce form is defined as a structured loop where: |
643 | /// (0) No values defined within the loop body are used outside the loop body. |
644 | /// (1) The block arguments and successor operands of the exit block are equal |
645 | /// to the block arguments of the loop header and the successor operands |
646 | /// of the back edge. |
647 | /// |
648 | /// This is required for many structured control flow ops as they tend |
649 | /// to not have separate "loop result arguments" and "loop iteration arguments" |
650 | /// at the end of the block. Rather, the "loop iteration arguments" from the |
651 | /// last iteration are the result of the loop. |
652 | /// |
653 | /// Note that the requirement of (0) is shared with LCSSA form in LLVM. However, |
654 | /// due to this being a structured loop instead of a general loop, we do not |
655 | /// require complicated dominance algorithms nor SSA updating making this |
656 | /// implementation easier than creating a generic LCSSA transformation pass. |
657 | static SmallVector<Value> |
658 | transformToReduceLoop(Block *, Block *exitBlock, |
659 | const llvm::SmallSetVector<Block *, 4> &loopBlocks, |
660 | function_ref<Value(Type)> getUndefValue, |
661 | DominanceInfo &dominanceInfo) { |
662 | Block *latch = exitBlock->getSinglePredecessor(); |
663 | assert(latch && |
664 | "Exit block must have only latch as predecessor at this point" ); |
665 | assert(exitBlock->getNumArguments() == 0 && |
666 | "Exit block mustn't have any block arguments at this point" ); |
667 | |
668 | unsigned = 0; |
669 | unsigned exitBlockIndex = 1; |
670 | if (latch->getSuccessor(i: loopHeaderIndex) != loopHeader) |
671 | std::swap(a&: loopHeaderIndex, b&: exitBlockIndex); |
672 | |
673 | assert(latch->getSuccessor(loopHeaderIndex) == loopHeader); |
674 | assert(latch->getSuccessor(exitBlockIndex) == exitBlock); |
675 | |
676 | MutableOperandRange exitBlockSuccessorOperands = |
677 | getMutableSuccessorOperands(block: latch, successorIndex: exitBlockIndex); |
678 | // Save the values as a vector, not a `MutableOperandRange` as the latter gets |
679 | // invalidated when mutating the operands through a different |
680 | // `MutableOperandRange` of the same operation. |
681 | SmallVector<Value> loopHeaderSuccessorOperands = |
682 | llvm::to_vector(Range: getSuccessorOperands(block: latch, successorIndex: loopHeaderIndex)); |
683 | |
684 | // Add all values used in the next iteration to the exit block. Replace |
685 | // any uses that are outside the loop with the newly created exit block. |
686 | for (Value arg : loopHeaderSuccessorOperands) { |
687 | BlockArgument exitArg = exitBlock->addArgument(type: arg.getType(), loc: arg.getLoc()); |
688 | exitBlockSuccessorOperands.append(values: arg); |
689 | arg.replaceUsesWithIf(newValue: exitArg, shouldReplace: [&](OpOperand &use) { |
690 | return !loopBlocks.contains(key: use.getOwner()->getBlock()); |
691 | }); |
692 | } |
693 | |
694 | // Loop below might add block arguments to the latch and loop header. |
695 | // Save the block arguments prior to the loop to not process these. |
696 | SmallVector<BlockArgument> latchBlockArgumentsPrior = |
697 | llvm::to_vector(Range: latch->getArguments()); |
698 | SmallVector<BlockArgument> = |
699 | llvm::to_vector(Range: loopHeader->getArguments()); |
700 | |
701 | // Go over all values defined within the loop body. If any of them are used |
702 | // outside the loop body, create a block argument on the exit block and loop |
703 | // header and replace the outside uses with the exit block argument. |
704 | // The loop header block argument is added to satisfy requirement (1) in the |
705 | // reduce form condition. |
706 | for (Block *loopBlock : loopBlocks) { |
707 | // Cache dominance queries for loopBlock. |
708 | // There are likely to be many duplicate queries as there can be many value |
709 | // definitions within a block. |
710 | llvm::SmallDenseMap<Block *, bool> dominanceCache; |
711 | // Returns true if `loopBlock` dominates `block`. |
712 | auto loopBlockDominates = [&](Block *block) { |
713 | auto [iter, inserted] = dominanceCache.insert(KV: {block, false}); |
714 | if (!inserted) |
715 | return iter->second; |
716 | iter->second = dominanceInfo.dominates(a: loopBlock, b: block); |
717 | return iter->second; |
718 | }; |
719 | |
720 | auto checkValue = [&](Value value) { |
721 | Value blockArgument; |
722 | for (OpOperand &use : llvm::make_early_inc_range(Range: value.getUses())) { |
723 | // Go through all the parent blocks and find the one part of the region |
724 | // of the loop. If the block is part of the loop, then the value does |
725 | // not escape the loop through this use. |
726 | Block *currBlock = use.getOwner()->getBlock(); |
727 | while (currBlock && currBlock->getParent() != loopHeader->getParent()) |
728 | currBlock = currBlock->getParentOp()->getBlock(); |
729 | if (loopBlocks.contains(key: currBlock)) |
730 | continue; |
731 | |
732 | // Block argument is only created the first time it is required. |
733 | if (!blockArgument) { |
734 | blockArgument = |
735 | exitBlock->addArgument(type: value.getType(), loc: value.getLoc()); |
736 | loopHeader->addArgument(type: value.getType(), loc: value.getLoc()); |
737 | |
738 | // `value` might be defined in a block that does not dominate `latch` |
739 | // but previously dominated an exit block with a use. |
740 | // In this case, add a block argument to the latch and go through all |
741 | // predecessors. If the value dominates the predecessor, pass the |
742 | // value as a successor operand, otherwise pass undef. |
743 | // The above is unnecessary if the value is a block argument of the |
744 | // latch or if `value` dominates all predecessors. |
745 | Value argument = value; |
746 | if (value.getParentBlock() != latch && |
747 | llvm::any_of(Range: latch->getPredecessors(), P: [&](Block *pred) { |
748 | return !loopBlockDominates(pred); |
749 | })) { |
750 | argument = latch->addArgument(type: value.getType(), loc: value.getLoc()); |
751 | for (auto iter = latch->pred_begin(); iter != latch->pred_end(); |
752 | ++iter) { |
753 | Value succOperand = value; |
754 | if (!loopBlockDominates(*iter)) |
755 | succOperand = getUndefValue(value.getType()); |
756 | |
757 | getMutableSuccessorOperands(block: *iter, successorIndex: iter.getSuccessorIndex()) |
758 | .append(values: succOperand); |
759 | } |
760 | } |
761 | |
762 | loopHeaderSuccessorOperands.push_back(Elt: argument); |
763 | for (Edge edge : successorEdges(block: latch)) |
764 | edge.getMutableSuccessorOperands().append(values: argument); |
765 | } |
766 | |
767 | use.set(blockArgument); |
768 | } |
769 | }; |
770 | |
771 | if (loopBlock == latch) |
772 | llvm::for_each(Range&: latchBlockArgumentsPrior, F: checkValue); |
773 | else if (loopBlock == loopHeader) |
774 | llvm::for_each(Range&: loopHeaderArgumentsPrior, F: checkValue); |
775 | else |
776 | llvm::for_each(Range: loopBlock->getArguments(), F: checkValue); |
777 | |
778 | for (Operation &op : *loopBlock) |
779 | llvm::for_each(Range: op.getResults(), F: checkValue); |
780 | } |
781 | |
782 | // New block arguments may have been added to the loop header. |
783 | // Adjust the entry edges to pass undef values to these. |
784 | for (auto iter = loopHeader->pred_begin(); iter != loopHeader->pred_end(); |
785 | ++iter) { |
786 | // Latch successor arguments have already been handled. |
787 | if (*iter == latch) |
788 | continue; |
789 | |
790 | MutableOperandRange succOps = |
791 | getMutableSuccessorOperands(block: *iter, successorIndex: iter.getSuccessorIndex()); |
792 | succOps.append(values: llvm::map_to_vector( |
793 | C: loopHeader->getArguments().drop_front(N: succOps.size()), |
794 | F: [&](BlockArgument arg) { return getUndefValue(arg.getType()); })); |
795 | } |
796 | |
797 | return loopHeaderSuccessorOperands; |
798 | } |
799 | |
800 | /// Transforms all outer-most cycles in the region with the region entry |
801 | /// `regionEntry` into structured loops. Returns the entry blocks of any newly |
802 | /// created regions potentially requiring further transformations. |
803 | static FailureOr<SmallVector<Block *>> transformCyclesToSCFLoops( |
804 | Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue, |
805 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
806 | DominanceInfo &dominanceInfo, ReturnLikeExitCombiner &exitCombiner) { |
807 | SmallVector<Block *> newSubRegions; |
808 | auto scc = llvm::scc_begin(G: regionEntry); |
809 | while (!scc.isAtEnd()) { |
810 | if (!scc.hasCycle()) { |
811 | ++scc; |
812 | continue; |
813 | } |
814 | |
815 | // Save the set and increment the SCC iterator early to avoid our |
816 | // modifications breaking the SCC iterator. |
817 | llvm::SmallSetVector<Block *, 4> cycleBlockSet(scc->begin(), scc->end()); |
818 | ++scc; |
819 | |
820 | CycleEdges edges = calculateCycleEdges(cycles: cycleBlockSet); |
821 | Block * = edges.entryEdges.front().getSuccessor(); |
822 | // First turn the cycle into a loop by creating a single entry block if |
823 | // needed. |
824 | if (edges.entryEdges.size() > 1) { |
825 | SmallVector<Edge> edgesToEntryBlocks; |
826 | llvm::append_range(C&: edgesToEntryBlocks, R&: edges.entryEdges); |
827 | llvm::append_range(C&: edgesToEntryBlocks, R&: edges.backEdges); |
828 | |
829 | EdgeMultiplexer multiplexer = createSingleEntryBlock( |
830 | loc: loopHeader->getTerminator()->getLoc(), entryEdges: edgesToEntryBlocks, |
831 | getSwitchValue, getUndefValue, interface); |
832 | |
833 | loopHeader = multiplexer.getMultiplexerBlock(); |
834 | } |
835 | cycleBlockSet.insert(X: loopHeader); |
836 | |
837 | // Then turn it into a structured loop by creating a single latch. |
838 | FailureOr<StructuredLoopProperties> loopProperties = |
839 | createSingleExitingLatch( |
840 | loc: edges.backEdges.front().getFromBlock()->getTerminator()->getLoc(), |
841 | backEdges: edges.backEdges, exitEdges: edges.exitEdges, getSwitchValue, getUndefValue, |
842 | interface, exitCombiner); |
843 | if (failed(result: loopProperties)) |
844 | return failure(); |
845 | |
846 | Block *latchBlock = loopProperties->latch; |
847 | Block *exitBlock = loopProperties->exitBlock; |
848 | cycleBlockSet.insert(X: latchBlock); |
849 | cycleBlockSet.insert(X: loopHeader); |
850 | |
851 | // Finally, turn it into reduce form. |
852 | SmallVector<Value> iterationValues = transformToReduceLoop( |
853 | loopHeader, exitBlock, loopBlocks: cycleBlockSet, getUndefValue, dominanceInfo); |
854 | |
855 | // Create a block acting as replacement for the loop header and insert |
856 | // the structured loop into it. |
857 | auto *newLoopParentBlock = new Block; |
858 | newLoopParentBlock->insertBefore(block: loopHeader); |
859 | addBlockArgumentsFromOther(block: newLoopParentBlock, other: loopHeader); |
860 | |
861 | Region::BlockListType &blocks = regionEntry->getParent()->getBlocks(); |
862 | Region loopBody; |
863 | // Make sure the loop header is the entry block. |
864 | loopBody.push_back(block: blocks.remove(IT: loopHeader)); |
865 | for (Block *block : cycleBlockSet) |
866 | if (block != latchBlock && block != loopHeader) |
867 | loopBody.push_back(block: blocks.remove(IT: block)); |
868 | // And the latch is the last block. |
869 | loopBody.push_back(block: blocks.remove(IT: latchBlock)); |
870 | |
871 | Operation *oldTerminator = latchBlock->getTerminator(); |
872 | oldTerminator->remove(); |
873 | |
874 | auto builder = OpBuilder::atBlockBegin(block: newLoopParentBlock); |
875 | FailureOr<Operation *> structuredLoopOp = |
876 | interface.createStructuredDoWhileLoopOp( |
877 | builder, replacedOp: oldTerminator, loopValuesInit: newLoopParentBlock->getArguments(), |
878 | condition: loopProperties->condition, loopValuesNextIter: iterationValues, loopBody: std::move(loopBody)); |
879 | if (failed(result: structuredLoopOp)) |
880 | return failure(); |
881 | oldTerminator->erase(); |
882 | |
883 | newSubRegions.push_back(Elt: loopHeader); |
884 | |
885 | for (auto &&[oldValue, newValue] : llvm::zip( |
886 | t: exitBlock->getArguments(), u: (*structuredLoopOp)->getResults())) |
887 | oldValue.replaceAllUsesWith(newValue); |
888 | |
889 | loopHeader->replaceAllUsesWith(newValue&: newLoopParentBlock); |
890 | // Merge the exit block right after the loop operation. |
891 | newLoopParentBlock->getOperations().splice(where: newLoopParentBlock->end(), |
892 | L2&: exitBlock->getOperations()); |
893 | exitBlock->erase(); |
894 | } |
895 | return newSubRegions; |
896 | } |
897 | |
898 | /// Makes sure the branch region only has a single exit. This is required by the |
899 | /// recursive part of the algorithm, as it expects the CFG to be single-entry |
900 | /// and single-exit. This is done by simply creating an empty block if there |
901 | /// is more than one block with an edge to the continuation block. All blocks |
902 | /// with edges to the continuation are then redirected to this block. A region |
903 | /// terminator is later placed into the block. |
904 | static void createSingleExitBranchRegion( |
905 | ArrayRef<Block *> branchRegion, Block *continuation, |
906 | SmallVectorImpl<std::pair<Block *, SmallVector<Value>>> &createdEmptyBlocks, |
907 | Region &conditionalRegion) { |
908 | Block *singleExitBlock = nullptr; |
909 | std::optional<Edge> previousEdgeToContinuation; |
910 | Region::BlockListType &parentBlockList = |
911 | branchRegion.front()->getParent()->getBlocks(); |
912 | for (Block *block : branchRegion) { |
913 | for (Edge edge : successorEdges(block)) { |
914 | if (edge.getSuccessor() != continuation) |
915 | continue; |
916 | |
917 | if (!previousEdgeToContinuation) { |
918 | previousEdgeToContinuation = edge; |
919 | continue; |
920 | } |
921 | |
922 | // If this is not the first edge to the continuation we create the |
923 | // single exit block and redirect the edges. |
924 | if (!singleExitBlock) { |
925 | singleExitBlock = new Block; |
926 | addBlockArgumentsFromOther(block: singleExitBlock, other: continuation); |
927 | previousEdgeToContinuation->setSuccessor(singleExitBlock); |
928 | createdEmptyBlocks.emplace_back(Args&: singleExitBlock, |
929 | Args: singleExitBlock->getArguments()); |
930 | } |
931 | |
932 | edge.setSuccessor(singleExitBlock); |
933 | } |
934 | |
935 | conditionalRegion.push_back(block: parentBlockList.remove(IT: block)); |
936 | } |
937 | |
938 | if (singleExitBlock) |
939 | conditionalRegion.push_back(block: singleExitBlock); |
940 | } |
941 | |
942 | /// Returns true if this block is an exit block of the region. |
943 | static bool isRegionExitBlock(Block *block) { |
944 | return block->getNumSuccessors() == 0; |
945 | } |
946 | |
947 | /// Transforms the first occurrence of conditional control flow in `regionEntry` |
948 | /// into conditionally executed regions. Returns the entry block of the created |
949 | /// regions and the region after the conditional control flow. |
950 | static FailureOr<SmallVector<Block *>> transformToStructuredCFBranches( |
951 | Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue, |
952 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
953 | DominanceInfo &dominanceInfo) { |
954 | // Trivial region. |
955 | if (regionEntry->getNumSuccessors() == 0) |
956 | return SmallVector<Block *>{}; |
957 | |
958 | if (regionEntry->getNumSuccessors() == 1) { |
959 | // Single successor we can just splice together. |
960 | Block *successor = regionEntry->getSuccessor(i: 0); |
961 | for (auto &&[oldValue, newValue] : llvm::zip( |
962 | t: successor->getArguments(), u: getSuccessorOperands(block: regionEntry, successorIndex: 0))) |
963 | oldValue.replaceAllUsesWith(newValue); |
964 | regionEntry->getTerminator()->erase(); |
965 | |
966 | regionEntry->getOperations().splice(where: regionEntry->end(), |
967 | L2&: successor->getOperations()); |
968 | successor->erase(); |
969 | return SmallVector<Block *>{regionEntry}; |
970 | } |
971 | |
972 | // Split the CFG into "#numSuccessor + 1" regions. |
973 | // For every edge to a successor, the blocks it solely dominates are |
974 | // determined and become the region following that edge. |
975 | // The last region is the continuation that follows the branch regions. |
976 | SmallPtrSet<Block *, 8> notContinuation; |
977 | notContinuation.insert(Ptr: regionEntry); |
978 | SmallVector<SmallVector<Block *>> successorBranchRegions( |
979 | regionEntry->getNumSuccessors()); |
980 | for (auto &&[blockList, succ] : |
981 | llvm::zip(t&: successorBranchRegions, u: regionEntry->getSuccessors())) { |
982 | // If the region entry is not the only predecessor, then the edge does not |
983 | // dominate the block it leads to. |
984 | if (succ->getSinglePredecessor() != regionEntry) |
985 | continue; |
986 | |
987 | // Otherwise get all blocks it dominates in DFS/pre-order. |
988 | DominanceInfoNode *node = dominanceInfo.getNode(a: succ); |
989 | for (DominanceInfoNode *curr : llvm::depth_first(G: node)) { |
990 | blockList.push_back(Elt: curr->getBlock()); |
991 | notContinuation.insert(Ptr: curr->getBlock()); |
992 | } |
993 | } |
994 | |
995 | // Finds all relevant edges and checks the shape of the control flow graph at |
996 | // this point. |
997 | // Branch regions may either: |
998 | // * Be post-dominated by the continuation |
999 | // * Be post-dominated by a return-like op |
1000 | // * Dominate a return-like op and have an edge to the continuation. |
1001 | // |
1002 | // The control flow graph may then be one of three cases: |
1003 | // 1) All branch regions are post-dominated by the continuation. This is the |
1004 | // usual case. If there are multiple entry blocks into the continuation a |
1005 | // single entry block has to be created. A structured control flow op |
1006 | // can then be created from the branch regions. |
1007 | // |
1008 | // 2) No branch region has an edge to a continuation: |
1009 | // +-----+ |
1010 | // +-----+ bb0 +----+ |
1011 | // v +-----+ v |
1012 | // Region 1 +-+--+ ... +-+--+ Region n |
1013 | // |ret1| |ret2| |
1014 | // +----+ +----+ |
1015 | // |
1016 | // This can only occur if every region ends with a different kind of |
1017 | // return-like op. In that case the control flow operation must stay as we are |
1018 | // unable to create a single exit-block. We can nevertheless process all its |
1019 | // successors as they single-entry, single-exit regions. |
1020 | // |
1021 | // 3) Only some branch regions are post-dominated by the continuation. |
1022 | // The other branch regions may either be post-dominated by a return-like op |
1023 | // or lead to either the continuation or return-like op. |
1024 | // In this case we also create a single entry block like in 1) that also |
1025 | // includes all edges to the return-like op: |
1026 | // +-----+ |
1027 | // +-----+ bb0 +----+ |
1028 | // v +-----+ v |
1029 | // Region 1 +-+-+ ... +-+-+ Region n |
1030 | // +---+ +---+ |
1031 | // +---+ |... ... |
1032 | // |ret|<-+ | | |
1033 | // +---+ | +---+ | |
1034 | // +---->++ ++<---+ |
1035 | // | | |
1036 | // ++ ++ Region T |
1037 | // +---+ |
1038 | // This transforms to: |
1039 | // +-----+ |
1040 | // +-----+ bb0 +----+ |
1041 | // v +-----+ v |
1042 | // Region 1 +-+-+ ... +-+-+ Region n |
1043 | // +---+ +---+ |
1044 | // ... +-----+ ... |
1045 | // +---->+ bbM +<---+ |
1046 | // +-----+ |
1047 | // +-----+ | |
1048 | // | v |
1049 | // +---+ | +---+ |
1050 | // |ret+<---+ ++ ++ |
1051 | // +---+ | | |
1052 | // ++ ++ Region T |
1053 | // +---+ |
1054 | // |
1055 | // bb0 to bbM is now a single-entry, single-exit region that applies to case |
1056 | // 1). The control flow op at the end of bbM will trigger case 2. |
1057 | SmallVector<Edge> continuationEdges; |
1058 | bool continuationPostDominatesAllRegions = true; |
1059 | bool noSuccessorHasContinuationEdge = true; |
1060 | for (auto &&[entryEdge, branchRegion] : |
1061 | llvm::zip(t: successorEdges(block: regionEntry), u&: successorBranchRegions)) { |
1062 | |
1063 | // If the branch region is empty then the branch target itself is part of |
1064 | // the continuation. |
1065 | if (branchRegion.empty()) { |
1066 | continuationEdges.push_back(Elt: entryEdge); |
1067 | noSuccessorHasContinuationEdge = false; |
1068 | continue; |
1069 | } |
1070 | |
1071 | for (Block *block : branchRegion) { |
1072 | if (isRegionExitBlock(block)) { |
1073 | // If a return-like op is part of the branch region then the |
1074 | // continuation no longer post-dominates the branch region. |
1075 | // Add all its incoming edges to edge list to create the single-exit |
1076 | // block for all branch regions. |
1077 | continuationPostDominatesAllRegions = false; |
1078 | for (auto iter = block->pred_begin(); iter != block->pred_end(); |
1079 | ++iter) { |
1080 | continuationEdges.emplace_back(Args: *iter, Args: iter.getSuccessorIndex()); |
1081 | } |
1082 | continue; |
1083 | } |
1084 | |
1085 | for (Edge edge : successorEdges(block)) { |
1086 | if (notContinuation.contains(Ptr: edge.getSuccessor())) |
1087 | continue; |
1088 | |
1089 | continuationEdges.push_back(Elt: edge); |
1090 | noSuccessorHasContinuationEdge = false; |
1091 | } |
1092 | } |
1093 | } |
1094 | |
1095 | // case 2) Keep the control flow op but process its successors further. |
1096 | if (noSuccessorHasContinuationEdge) |
1097 | return llvm::to_vector(Range: regionEntry->getSuccessors()); |
1098 | |
1099 | Block *continuation = llvm::find_singleton<Block>( |
1100 | Range&: continuationEdges, P: [](Edge edge, bool) { return edge.getSuccessor(); }, |
1101 | /*AllowRepeats=*/true); |
1102 | |
1103 | // In case 3) or if not all continuation edges have the same entry block, |
1104 | // create a single entry block as continuation for all branch regions. |
1105 | if (!continuation || !continuationPostDominatesAllRegions) { |
1106 | EdgeMultiplexer multiplexer = createSingleEntryBlock( |
1107 | loc: continuationEdges.front().getFromBlock()->getTerminator()->getLoc(), |
1108 | entryEdges: continuationEdges, getSwitchValue, getUndefValue, interface); |
1109 | continuation = multiplexer.getMultiplexerBlock(); |
1110 | } |
1111 | |
1112 | // Trigger reprocess of case 3) after creating the single entry block. |
1113 | if (!continuationPostDominatesAllRegions) { |
1114 | // Unlike in the general case, we are explicitly revisiting the same region |
1115 | // entry again after having changed its control flow edges and dominance. |
1116 | // We have to therefore explicitly invalidate the dominance tree. |
1117 | dominanceInfo.invalidate(region: regionEntry->getParent()); |
1118 | return SmallVector<Block *>{regionEntry}; |
1119 | } |
1120 | |
1121 | SmallVector<Block *> newSubRegions; |
1122 | |
1123 | // Empty blocks with the values they return to the parent op. |
1124 | SmallVector<std::pair<Block *, SmallVector<Value>>> createdEmptyBlocks; |
1125 | |
1126 | // Create the branch regions. |
1127 | std::vector<Region> conditionalRegions(successorBranchRegions.size()); |
1128 | for (auto &&[branchRegion, entryEdge, conditionalRegion] : |
1129 | llvm::zip(t&: successorBranchRegions, u: successorEdges(block: regionEntry), |
1130 | args&: conditionalRegions)) { |
1131 | if (branchRegion.empty()) { |
1132 | // If no block is part of the branch region, we create a dummy block to |
1133 | // place the region terminator into. |
1134 | createdEmptyBlocks.emplace_back( |
1135 | Args: new Block, Args: llvm::to_vector(Range: entryEdge.getSuccessorOperands())); |
1136 | conditionalRegion.push_back(block: createdEmptyBlocks.back().first); |
1137 | continue; |
1138 | } |
1139 | |
1140 | createSingleExitBranchRegion(branchRegion, continuation, createdEmptyBlocks, |
1141 | conditionalRegion); |
1142 | |
1143 | // The entries of the branch regions may only have redundant block arguments |
1144 | // since the edge to the branch region is always dominating. |
1145 | Block *subRegionEntryBlock = &conditionalRegion.front(); |
1146 | for (auto &&[oldValue, newValue] : |
1147 | llvm::zip(t: subRegionEntryBlock->getArguments(), |
1148 | u: entryEdge.getSuccessorOperands())) |
1149 | oldValue.replaceAllUsesWith(newValue); |
1150 | |
1151 | subRegionEntryBlock->eraseArguments(start: 0, |
1152 | num: subRegionEntryBlock->getNumArguments()); |
1153 | newSubRegions.push_back(Elt: subRegionEntryBlock); |
1154 | } |
1155 | |
1156 | Operation *structuredCondOp; |
1157 | { |
1158 | auto opBuilder = OpBuilder::atBlockTerminator(block: regionEntry); |
1159 | FailureOr<Operation *> result = interface.createStructuredBranchRegionOp( |
1160 | builder&: opBuilder, controlFlowCondOp: regionEntry->getTerminator(), |
1161 | resultTypes: continuation->getArgumentTypes(), regions: conditionalRegions); |
1162 | if (failed(result)) |
1163 | return failure(); |
1164 | structuredCondOp = *result; |
1165 | regionEntry->getTerminator()->erase(); |
1166 | } |
1167 | |
1168 | for (auto &&[block, valueRange] : createdEmptyBlocks) { |
1169 | auto builder = OpBuilder::atBlockEnd(block); |
1170 | LogicalResult result = interface.createStructuredBranchRegionTerminatorOp( |
1171 | loc: structuredCondOp->getLoc(), builder, branchRegionOp: structuredCondOp, replacedControlFlowOp: nullptr, |
1172 | results: valueRange); |
1173 | if (failed(result)) |
1174 | return failure(); |
1175 | } |
1176 | |
1177 | // Any leftover users of the continuation must be from unconditional branches |
1178 | // in a branch region. There can only be at most one per branch region as |
1179 | // all branch regions have been made single-entry single-exit above. |
1180 | // Replace them with the region terminator. |
1181 | for (Operation *user : llvm::make_early_inc_range(Range: continuation->getUsers())) { |
1182 | assert(user->getNumSuccessors() == 1); |
1183 | auto builder = OpBuilder::atBlockTerminator(block: user->getBlock()); |
1184 | LogicalResult result = interface.createStructuredBranchRegionTerminatorOp( |
1185 | loc: user->getLoc(), builder, branchRegionOp: structuredCondOp, replacedControlFlowOp: user, |
1186 | results: getMutableSuccessorOperands(block: user->getBlock(), successorIndex: 0).getAsOperandRange()); |
1187 | if (failed(result)) |
1188 | return failure(); |
1189 | user->erase(); |
1190 | } |
1191 | |
1192 | for (auto &&[oldValue, newValue] : |
1193 | llvm::zip(t: continuation->getArguments(), u: structuredCondOp->getResults())) |
1194 | oldValue.replaceAllUsesWith(newValue); |
1195 | |
1196 | // Splice together the continuations operations with the region entry. |
1197 | regionEntry->getOperations().splice(where: regionEntry->end(), |
1198 | L2&: continuation->getOperations()); |
1199 | |
1200 | continuation->erase(); |
1201 | |
1202 | // After splicing the continuation, the region has to be reprocessed as it has |
1203 | // new successors. |
1204 | newSubRegions.push_back(Elt: regionEntry); |
1205 | |
1206 | return newSubRegions; |
1207 | } |
1208 | |
1209 | /// Transforms the region to only have a single block for every kind of |
1210 | /// return-like operation that all previous occurrences of the return-like op |
1211 | /// branch to. If the region only contains a single kind of return-like |
1212 | /// operation, it creates a single-entry and single-exit region. |
1213 | static ReturnLikeExitCombiner createSingleExitBlocksForReturnLike( |
1214 | Region ®ion, function_ref<Value(unsigned)> getSwitchValue, |
1215 | CFGToSCFInterface &interface) { |
1216 | ReturnLikeExitCombiner exitCombiner(region, interface); |
1217 | |
1218 | for (Block &block : region.getBlocks()) { |
1219 | if (block.getNumSuccessors() != 0) |
1220 | continue; |
1221 | exitCombiner.combineExit(returnLikeOp: block.getTerminator(), getSwitchValue); |
1222 | } |
1223 | |
1224 | return exitCombiner; |
1225 | } |
1226 | |
1227 | /// Checks all preconditions of the transformation prior to any transformations. |
1228 | /// Returns failure if any precondition is violated. |
1229 | static LogicalResult checkTransformationPreconditions(Region ®ion) { |
1230 | for (Block &block : region.getBlocks()) |
1231 | if (block.hasNoPredecessors() && !block.isEntryBlock()) |
1232 | return block.front().emitOpError( |
1233 | message: "transformation does not support unreachable blocks" ); |
1234 | |
1235 | WalkResult result = region.walk(callback: [](Operation *operation) { |
1236 | if (operation->getNumSuccessors() == 0) |
1237 | return WalkResult::advance(); |
1238 | |
1239 | // This transformation requires all ops with successors to implement the |
1240 | // branch op interface. It is impossible to adjust their block arguments |
1241 | // otherwise. |
1242 | auto branchOpInterface = dyn_cast<BranchOpInterface>(operation); |
1243 | if (!branchOpInterface) { |
1244 | operation->emitOpError(message: "transformation does not support terminators with " |
1245 | "successors not implementing BranchOpInterface" ); |
1246 | return WalkResult::interrupt(); |
1247 | } |
1248 | // Branch operations must have no side effects. Replacing them would not be |
1249 | // valid otherwise. |
1250 | if (!isMemoryEffectFree(branchOpInterface)) { |
1251 | branchOpInterface->emitOpError( |
1252 | "transformation does not support terminators with side effects" ); |
1253 | return WalkResult::interrupt(); |
1254 | } |
1255 | |
1256 | for (unsigned index : llvm::seq(Size: operation->getNumSuccessors())) { |
1257 | SuccessorOperands succOps = branchOpInterface.getSuccessorOperands(index); |
1258 | |
1259 | // We cannot support operations with operation-produced successor operands |
1260 | // as it is currently not possible to pass them to any block arguments |
1261 | // other than the first. This breaks creating multiplexer blocks and would |
1262 | // likely need special handling elsewhere too. |
1263 | if (succOps.getProducedOperandCount() == 0) |
1264 | continue; |
1265 | |
1266 | branchOpInterface->emitOpError("transformation does not support " |
1267 | "operations with operation-produced " |
1268 | "successor operands" ); |
1269 | return WalkResult::interrupt(); |
1270 | } |
1271 | return WalkResult::advance(); |
1272 | }); |
1273 | return failure(isFailure: result.wasInterrupted()); |
1274 | } |
1275 | |
1276 | FailureOr<bool> mlir::transformCFGToSCF(Region ®ion, |
1277 | CFGToSCFInterface &interface, |
1278 | DominanceInfo &dominanceInfo) { |
1279 | if (region.empty() || region.hasOneBlock()) |
1280 | return false; |
1281 | |
1282 | if (failed(result: checkTransformationPreconditions(region))) |
1283 | return failure(); |
1284 | |
1285 | DenseMap<Type, Value> typedUndefCache; |
1286 | auto getUndefValue = [&](Type type) { |
1287 | auto [iter, inserted] = typedUndefCache.insert(KV: {type, nullptr}); |
1288 | if (!inserted) |
1289 | return iter->second; |
1290 | |
1291 | auto constantBuilder = OpBuilder::atBlockBegin(block: ®ion.front()); |
1292 | |
1293 | iter->second = |
1294 | interface.getUndefValue(loc: region.getLoc(), builder&: constantBuilder, type); |
1295 | return iter->second; |
1296 | }; |
1297 | |
1298 | // The transformation only creates all values in the range of 0 to |
1299 | // max(#numSuccessors). Therefore using a vector instead of a map. |
1300 | SmallVector<Value> switchValueCache; |
1301 | auto getSwitchValue = [&](unsigned value) { |
1302 | if (value < switchValueCache.size()) |
1303 | if (switchValueCache[value]) |
1304 | return switchValueCache[value]; |
1305 | |
1306 | auto constantBuilder = OpBuilder::atBlockBegin(block: ®ion.front()); |
1307 | |
1308 | switchValueCache.resize( |
1309 | N: std::max<size_t>(a: switchValueCache.size(), b: value + 1)); |
1310 | |
1311 | switchValueCache[value] = |
1312 | interface.getCFGSwitchValue(loc: region.getLoc(), builder&: constantBuilder, value); |
1313 | return switchValueCache[value]; |
1314 | }; |
1315 | |
1316 | ReturnLikeExitCombiner exitCombiner = |
1317 | createSingleExitBlocksForReturnLike(region, getSwitchValue, interface); |
1318 | |
1319 | // Invalidate any dominance tree on the region as the exit combiner has |
1320 | // added new blocks and edges. |
1321 | dominanceInfo.invalidate(region: ®ion); |
1322 | |
1323 | SmallVector<Block *> workList = {®ion.front()}; |
1324 | while (!workList.empty()) { |
1325 | Block *current = workList.pop_back_val(); |
1326 | |
1327 | // Turn all top-level cycles in the CFG to structured control flow first. |
1328 | // After this transformation, the remaining CFG ops form a DAG. |
1329 | FailureOr<SmallVector<Block *>> newRegions = |
1330 | transformCyclesToSCFLoops(regionEntry: current, getSwitchValue, getUndefValue, |
1331 | interface, dominanceInfo, exitCombiner); |
1332 | if (failed(result: newRegions)) |
1333 | return failure(); |
1334 | |
1335 | // Add the newly created subregions to the worklist. These are the |
1336 | // bodies of the loops. |
1337 | llvm::append_range(C&: workList, R&: *newRegions); |
1338 | // Invalidate the dominance tree as blocks have been moved, created and |
1339 | // added during the cycle to structured loop transformation. |
1340 | if (!newRegions->empty()) |
1341 | dominanceInfo.invalidate(region: current->getParent()); |
1342 | |
1343 | newRegions = transformToStructuredCFBranches( |
1344 | regionEntry: current, getSwitchValue, getUndefValue, interface, dominanceInfo); |
1345 | if (failed(result: newRegions)) |
1346 | return failure(); |
1347 | // Invalidating the dominance tree is generally not required by the |
1348 | // transformation above as the new region entries correspond to unaffected |
1349 | // subtrees in the dominator tree. Only its parent nodes have changed but |
1350 | // won't be visited again. |
1351 | llvm::append_range(C&: workList, R&: *newRegions); |
1352 | } |
1353 | |
1354 | return true; |
1355 | } |
1356 | |