| 1 | //===- CFGToSCF.h - Control Flow Graph to Structured Control Flow *- C++ -*===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This code is an implementation of: |
| 10 | // Helge Bahmann, Nico Reissmann, Magnus Jahre, and Jan Christian Meyer. 2015. |
| 11 | // Perfect Reconstructability of Control Flow from Demand Dependence Graphs. ACM |
| 12 | // Trans. Archit. Code Optim. 11, 4, Article 66 (January 2015), 25 pages. |
| 13 | // https://doi.org/10.1145/2693261 |
| 14 | // |
| 15 | // It defines an algorithm to translate any control flow graph with a single |
| 16 | // entry and single exit block into structured control flow operations |
| 17 | // consisting of regions of do-while loops and operations conditionally |
| 18 | // dispatching to one out of multiple regions before continuing after the |
| 19 | // operation. This includes control flow graphs containing irreducible |
| 20 | // control flow. |
| 21 | // |
| 22 | // The implementation here additionally supports the transformation on |
| 23 | // regions with multiple exit blocks. This is implemented by first |
| 24 | // transforming all occurrences of return-like operations to branch to a |
| 25 | // single exit block containing an instance of that return-like operation. |
| 26 | // If there are multiple kinds of return-like operations, multiple exit |
| 27 | // blocks are created. In that case the transformation leaves behind a |
| 28 | // conditional control flow graph operation that dispatches to the given regions |
| 29 | // terminating with different kinds of return-like operations each. |
| 30 | // |
| 31 | // If the function only contains a single kind of return-like operations, |
| 32 | // it is guaranteed that all control flow graph ops will be lifted to structured |
| 33 | // control flow, and that no more control flow graph ops remain after the |
| 34 | // operation. |
| 35 | // |
| 36 | // The algorithm to lift CFGs consists of two transformations applied after each |
| 37 | // other on any single-entry, single-exit region: |
| 38 | // 1) Lifting cycles to structured control flow loops |
| 39 | // 2) Lifting conditional branches to structured control flow branches |
| 40 | // These are then applied recursively on any new single-entry single-exit |
| 41 | // regions created by the transformation until no more CFG operations remain. |
| 42 | // |
| 43 | // The first part of cycle lifting is to detect any cycles in the CFG. |
| 44 | // This is done using an algorithm for iterating over SCCs. Every SCC |
| 45 | // representing a cycle is then transformed into a structured loop with a single |
| 46 | // entry block and a single latch containing the only back edge to the entry |
| 47 | // block and the only edge to an exit block outside the loop. Rerouting control |
| 48 | // flow to create single entry and exit blocks is achieved via a multiplexer |
| 49 | // construct that can be visualized as follows: |
| 50 | // +-----+ +-----+ +-----+ |
| 51 | // | bb0 | | bb1 |...| bbN | |
| 52 | // +--+--+ +--+--+ +-+---+ |
| 53 | // | | | |
| 54 | // | v | |
| 55 | // | +------+ | |
| 56 | // | ++ ++<----+ |
| 57 | // | | Region | |
| 58 | // +>| |<----+ |
| 59 | // ++ ++ | |
| 60 | // +------+------+ |
| 61 | // |
| 62 | // The above transforms to: |
| 63 | // +-----+ +-----+ +-----+ |
| 64 | // | bb0 | | bb1 |...| bbN | |
| 65 | // +-----+ +--|--+ ++----+ |
| 66 | // | v | |
| 67 | // +->+-----+<---+ |
| 68 | // | bbM |<-------+ |
| 69 | // +---+-+ | |
| 70 | // +---+ | +----+ | |
| 71 | // | v | | |
| 72 | // | +------+ | | |
| 73 | // | ++ ++<-+ | |
| 74 | // +->| Region | | |
| 75 | // ++ ++ | |
| 76 | // +------+-------+ |
| 77 | // |
| 78 | // bbM in the above is the multiplexer block, and any block previously branching |
| 79 | // to an entry block of the region are redirected to it. This includes any |
| 80 | // branches from within the region. Using a block argument, bbM then dispatches |
| 81 | // to the correct entry block of the region dependent on the predecessor. |
| 82 | // |
| 83 | // A similar transformation is done to create the latch block with the single |
| 84 | // back edge and loop exit edge. |
| 85 | // |
| 86 | // The above form has the advantage that bbM now acts as the loop header |
| 87 | // of the loop body. After the transformation on the latch, this results in a |
| 88 | // structured loop that can then be lifted to structured control flow. The |
| 89 | // conditional branches created in bbM are later lifted to conditional |
| 90 | // branches. |
| 91 | // |
| 92 | // Lifting conditional branches is done by analyzing the *first* conditional |
| 93 | // branch encountered in the entry region. The algorithm then identifies |
| 94 | // all blocks that are dominated by a specific control flow edge and |
| 95 | // the region where control flow continues: |
| 96 | // +-----+ |
| 97 | // +-----+ bb0 +----+ |
| 98 | // v +-----+ v |
| 99 | // Region 1 +-+-+ ... +-+-+ Region n |
| 100 | // +---+ +---+ |
| 101 | // ... ... |
| 102 | // | | |
| 103 | // | +---+ | |
| 104 | // +---->++ ++<---+ |
| 105 | // | | |
| 106 | // ++ ++ Region T |
| 107 | // +---+ |
| 108 | // Every region following bb0 consists of 0 or more blocks that eventually |
| 109 | // branch to Region T. If there are multiple entry blocks into Region T, a |
| 110 | // single entry block is created using a multiplexer block as shown above. |
| 111 | // Region 1 to Region n are then lifted together with the conditional control |
| 112 | // flow operation terminating bb0 into a structured conditional operation |
| 113 | // followed by the operations of the entry block of Region T. |
| 114 | //===----------------------------------------------------------------------===// |
| 115 | |
| 116 | #include "mlir/Transforms/CFGToSCF.h" |
| 117 | |
| 118 | #include "mlir/IR/RegionGraphTraits.h" |
| 119 | #include "mlir/Interfaces/ControlFlowInterfaces.h" |
| 120 | #include "mlir/Interfaces/SideEffectInterfaces.h" |
| 121 | #include "llvm/ADT/DepthFirstIterator.h" |
| 122 | #include "llvm/ADT/MapVector.h" |
| 123 | #include "llvm/ADT/SCCIterator.h" |
| 124 | #include "llvm/ADT/SetVector.h" |
| 125 | #include "llvm/ADT/SmallPtrSet.h" |
| 126 | |
| 127 | using namespace mlir; |
| 128 | |
| 129 | /// Returns the mutable operand range used to transfer operands from `block` to |
| 130 | /// its successor with the given index. The returned range being mutable allows |
| 131 | /// us to modify the operands being transferred. |
| 132 | static MutableOperandRange |
| 133 | getMutableSuccessorOperands(Block *block, unsigned successorIndex) { |
| 134 | auto branchOpInterface = cast<BranchOpInterface>(block->getTerminator()); |
| 135 | SuccessorOperands succOps = |
| 136 | branchOpInterface.getSuccessorOperands(successorIndex); |
| 137 | return succOps.getMutableForwardedOperands(); |
| 138 | } |
| 139 | |
| 140 | /// Return the operand range used to transfer operands from `block` to its |
| 141 | /// successor with the given index. |
| 142 | static OperandRange getSuccessorOperands(Block *block, |
| 143 | unsigned successorIndex) { |
| 144 | return getMutableSuccessorOperands(block, successorIndex); |
| 145 | } |
| 146 | |
| 147 | /// Appends all the block arguments from `other` to the block arguments of |
| 148 | /// `block`, copying their types and locations. |
| 149 | static void addBlockArgumentsFromOther(Block *block, Block *other) { |
| 150 | for (BlockArgument arg : other->getArguments()) |
| 151 | block->addArgument(type: arg.getType(), loc: arg.getLoc()); |
| 152 | } |
| 153 | |
| 154 | namespace { |
| 155 | |
| 156 | /// Class representing an edge in the CFG. Consists of a from-block, a successor |
| 157 | /// and corresponding successor operands passed to the block arguments of the |
| 158 | /// successor. |
| 159 | class Edge { |
| 160 | Block *fromBlock; |
| 161 | unsigned successorIndex; |
| 162 | |
| 163 | public: |
| 164 | /// Constructs a new edge from `fromBlock` to the successor corresponding to |
| 165 | /// `successorIndex`. |
| 166 | Edge(Block *fromBlock, unsigned int successorIndex) |
| 167 | : fromBlock(fromBlock), successorIndex(successorIndex) {} |
| 168 | |
| 169 | /// Returns the from-block. |
| 170 | Block *getFromBlock() const { return fromBlock; } |
| 171 | |
| 172 | /// Returns the successor of the edge. |
| 173 | Block *getSuccessor() const { |
| 174 | return fromBlock->getSuccessor(i: successorIndex); |
| 175 | } |
| 176 | |
| 177 | /// Sets the successor of the edge, adjusting the terminator in the |
| 178 | /// from-block. |
| 179 | void setSuccessor(Block *block) const { |
| 180 | fromBlock->getTerminator()->setSuccessor(block, index: successorIndex); |
| 181 | } |
| 182 | |
| 183 | /// Returns the arguments of this edge that are passed to the block arguments |
| 184 | /// of the successor. |
| 185 | MutableOperandRange getMutableSuccessorOperands() const { |
| 186 | return ::getMutableSuccessorOperands(block: fromBlock, successorIndex); |
| 187 | } |
| 188 | |
| 189 | /// Returns the arguments of this edge that are passed to the block arguments |
| 190 | /// of the successor. |
| 191 | OperandRange getSuccessorOperands() const { |
| 192 | return ::getSuccessorOperands(block: fromBlock, successorIndex); |
| 193 | } |
| 194 | }; |
| 195 | |
| 196 | /// Structure containing the entry, exit and back edges of a cycle. A cycle is a |
| 197 | /// generalization of a loop that may have multiple entry edges. See also |
| 198 | /// https://llvm.org/docs/CycleTerminology.html. |
| 199 | struct CycleEdges { |
| 200 | /// All edges from a block outside the cycle to a block inside the cycle. |
| 201 | /// The targets of these edges are entry blocks. |
| 202 | SmallVector<Edge> entryEdges; |
| 203 | /// All edges from a block inside the cycle to a block outside the cycle. |
| 204 | SmallVector<Edge> exitEdges; |
| 205 | /// All edges from a block inside the cycle to an entry block. |
| 206 | SmallVector<Edge> backEdges; |
| 207 | }; |
| 208 | |
| 209 | /// Class used to orchestrate creation of so-called edge multiplexers. |
| 210 | /// This class creates a new basic block and routes all inputs edges |
| 211 | /// to this basic block before branching to their original target. |
| 212 | /// The purpose of this transformation is to create single-entry, |
| 213 | /// single-exit regions. |
| 214 | class EdgeMultiplexer { |
| 215 | public: |
| 216 | /// Creates a new edge multiplexer capable of redirecting all edges to one of |
| 217 | /// the `entryBlocks`. This creates the multiplexer basic block with |
| 218 | /// appropriate block arguments after the first entry block. `extraArgs` |
| 219 | /// contains the types of possible extra block arguments passed to the |
| 220 | /// multiplexer block that are added to the successor operands of every |
| 221 | /// outgoing edge. |
| 222 | /// |
| 223 | /// NOTE: This does not yet redirect edges to branch to the |
| 224 | /// multiplexer block nor code dispatching from the multiplexer code |
| 225 | /// to the original successors. |
| 226 | /// See `redirectEdge` and `createSwitch`. |
| 227 | static EdgeMultiplexer create(Location loc, ArrayRef<Block *> entryBlocks, |
| 228 | function_ref<Value(unsigned)> getSwitchValue, |
| 229 | function_ref<Value(Type)> getUndefValue, |
| 230 | TypeRange = {}) { |
| 231 | assert(!entryBlocks.empty() && "Require at least one entry block" ); |
| 232 | |
| 233 | auto *multiplexerBlock = new Block; |
| 234 | multiplexerBlock->insertAfter(block: entryBlocks.front()); |
| 235 | |
| 236 | // To implement the multiplexer block, we have to add the block arguments of |
| 237 | // every distinct successor block to the multiplexer block. When redirecting |
| 238 | // edges, block arguments designated for blocks that aren't branched to will |
| 239 | // be assigned the `getUndefValue`. The amount of block arguments and their |
| 240 | // offset is saved in the map for `redirectEdge` to transform the edges. |
| 241 | llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping; |
| 242 | for (Block *entryBlock : entryBlocks) { |
| 243 | auto [iter, inserted] = blockArgMapping.insert( |
| 244 | KV: {entryBlock, multiplexerBlock->getNumArguments()}); |
| 245 | if (inserted) |
| 246 | addBlockArgumentsFromOther(block: multiplexerBlock, other: entryBlock); |
| 247 | } |
| 248 | |
| 249 | // If we have more than one successor, we have to additionally add a |
| 250 | // discriminator value, denoting which successor to jump to. |
| 251 | // When redirecting edges, an appropriate value will be passed using |
| 252 | // `getSwitchValue`. |
| 253 | Value discriminator; |
| 254 | if (blockArgMapping.size() > 1) |
| 255 | discriminator = |
| 256 | multiplexerBlock->addArgument(type: getSwitchValue(0).getType(), loc); |
| 257 | |
| 258 | multiplexerBlock->addArguments( |
| 259 | types: extraArgs, locs: SmallVector<Location>(extraArgs.size(), loc)); |
| 260 | |
| 261 | return EdgeMultiplexer(multiplexerBlock, getSwitchValue, getUndefValue, |
| 262 | std::move(blockArgMapping), discriminator); |
| 263 | } |
| 264 | |
| 265 | /// Returns the created multiplexer block. |
| 266 | Block *getMultiplexerBlock() const { return multiplexerBlock; } |
| 267 | |
| 268 | /// Redirects `edge` to branch to the multiplexer block before continuing to |
| 269 | /// its original target. The edges successor must have originally been part |
| 270 | /// of the entry blocks array passed to the `create` function. `extraArgs` |
| 271 | /// must be used to pass along any additional values corresponding to |
| 272 | /// `extraArgs` in `create`. |
| 273 | void redirectEdge(Edge edge, ValueRange = {}) const { |
| 274 | const auto *result = blockArgMapping.find(Key: edge.getSuccessor()); |
| 275 | assert(result != blockArgMapping.end() && |
| 276 | "Edge was not originally passed to `create` method." ); |
| 277 | |
| 278 | MutableOperandRange successorOperands = edge.getMutableSuccessorOperands(); |
| 279 | |
| 280 | // Extra arguments are always appended at the end of the block arguments. |
| 281 | unsigned = |
| 282 | multiplexerBlock->getNumArguments() - extraArgs.size(); |
| 283 | // If a discriminator exists, it is right before the extra arguments. |
| 284 | std::optional<unsigned> discriminatorIndex = |
| 285 | discriminator ? extraArgsBeginIndex - 1 : std::optional<unsigned>{}; |
| 286 | |
| 287 | SmallVector<Value> newSuccOperands(multiplexerBlock->getNumArguments()); |
| 288 | for (BlockArgument argument : multiplexerBlock->getArguments()) { |
| 289 | unsigned index = argument.getArgNumber(); |
| 290 | if (index >= result->second && |
| 291 | index < result->second + edge.getSuccessor()->getNumArguments()) { |
| 292 | // Original block arguments to the entry block. |
| 293 | newSuccOperands[index] = |
| 294 | successorOperands[index - result->second].get(); |
| 295 | continue; |
| 296 | } |
| 297 | |
| 298 | // Discriminator value if it exists. |
| 299 | if (index == discriminatorIndex) { |
| 300 | newSuccOperands[index] = |
| 301 | getSwitchValue(result - blockArgMapping.begin()); |
| 302 | continue; |
| 303 | } |
| 304 | |
| 305 | // Followed by the extra arguments. |
| 306 | if (index >= extraArgsBeginIndex) { |
| 307 | newSuccOperands[index] = extraArgs[index - extraArgsBeginIndex]; |
| 308 | continue; |
| 309 | } |
| 310 | |
| 311 | // Otherwise undef values for any unused block arguments used by other |
| 312 | // entry blocks. |
| 313 | newSuccOperands[index] = getUndefValue(argument.getType()); |
| 314 | } |
| 315 | |
| 316 | edge.setSuccessor(multiplexerBlock); |
| 317 | successorOperands.assign(values: newSuccOperands); |
| 318 | } |
| 319 | |
| 320 | /// Creates a switch op using `builder` which dispatches to the original |
| 321 | /// successors of the edges passed to `create` minus the ones in `excluded`. |
| 322 | /// The builder's insertion point has to be in a block dominated by the |
| 323 | /// multiplexer block. All edges to the multiplexer block must have already |
| 324 | /// been redirected using `redirectEdge`. |
| 325 | void createSwitch( |
| 326 | Location loc, OpBuilder &builder, CFGToSCFInterface &interface, |
| 327 | const SmallPtrSetImpl<Block *> &excluded = SmallPtrSet<Block *, 1>{}) { |
| 328 | // We create the switch by creating a case for all entries and then |
| 329 | // splitting of the last entry as a default case. |
| 330 | |
| 331 | SmallVector<ValueRange> caseArguments; |
| 332 | SmallVector<unsigned> caseValues; |
| 333 | SmallVector<Block *> caseDestinations; |
| 334 | for (auto &&[index, pair] : llvm::enumerate(First&: blockArgMapping)) { |
| 335 | auto &&[succ, offset] = pair; |
| 336 | if (excluded.contains(Ptr: succ)) |
| 337 | continue; |
| 338 | |
| 339 | caseValues.push_back(Elt: index); |
| 340 | caseArguments.push_back(Elt: multiplexerBlock->getArguments().slice( |
| 341 | N: offset, M: succ->getNumArguments())); |
| 342 | caseDestinations.push_back(Elt: succ); |
| 343 | } |
| 344 | |
| 345 | // If we don't have a discriminator due to only having one entry we have to |
| 346 | // create a dummy flag for the switch. |
| 347 | Value realDiscriminator = discriminator; |
| 348 | if (!realDiscriminator || caseArguments.size() == 1) |
| 349 | realDiscriminator = getSwitchValue(0); |
| 350 | |
| 351 | caseValues.pop_back(); |
| 352 | Block *defaultDest = caseDestinations.pop_back_val(); |
| 353 | ValueRange defaultArgs = caseArguments.pop_back_val(); |
| 354 | |
| 355 | assert(!builder.getInsertionBlock()->hasNoPredecessors() && |
| 356 | "Edges need to be redirected prior to creating switch." ); |
| 357 | interface.createCFGSwitchOp(loc, builder, flag: realDiscriminator, caseValues, |
| 358 | caseDestinations, caseArguments, defaultDest, |
| 359 | defaultArgs); |
| 360 | } |
| 361 | |
| 362 | private: |
| 363 | /// Newly created multiplexer block. |
| 364 | Block *multiplexerBlock; |
| 365 | /// Callback used to create a constant suitable as flag for |
| 366 | /// the interfaces `createCFGSwitchOp`. |
| 367 | function_ref<Value(unsigned)> getSwitchValue; |
| 368 | /// Callback used to create undefined values of a given type. |
| 369 | function_ref<Value(Type)> getUndefValue; |
| 370 | |
| 371 | /// Mapping of the block arguments of an entry block to the corresponding |
| 372 | /// block arguments in the multiplexer block. Block arguments of an entry |
| 373 | /// block are simply appended ot the multiplexer block. This map simply |
| 374 | /// contains the offset to the range in the multiplexer block. |
| 375 | llvm::SmallMapVector<Block *, unsigned, 4> blockArgMapping; |
| 376 | /// Discriminator value used in the multiplexer block to dispatch to the |
| 377 | /// correct entry block. Null value if not required due to only having one |
| 378 | /// entry block. |
| 379 | Value discriminator; |
| 380 | |
| 381 | EdgeMultiplexer(Block *multiplexerBlock, |
| 382 | function_ref<Value(unsigned)> getSwitchValue, |
| 383 | function_ref<Value(Type)> getUndefValue, |
| 384 | llvm::SmallMapVector<Block *, unsigned, 4> &&entries, |
| 385 | Value dispatchFlag) |
| 386 | : multiplexerBlock(multiplexerBlock), getSwitchValue(getSwitchValue), |
| 387 | getUndefValue(getUndefValue), blockArgMapping(std::move(entries)), |
| 388 | discriminator(dispatchFlag) {} |
| 389 | }; |
| 390 | |
| 391 | /// Alternative implementation of DenseMapInfo<Operation*> using the operation |
| 392 | /// equivalence infrastructure to check whether two 'return-like' operations are |
| 393 | /// equivalent in the context of this transformation. This means that both |
| 394 | /// operations are of the same kind, have the same amount of operands and types |
| 395 | /// and the same attributes and properties. The operands themselves don't have |
| 396 | /// to be equivalent. |
| 397 | struct ReturnLikeOpEquivalence : public llvm::DenseMapInfo<Operation *> { |
| 398 | static unsigned getHashValue(const Operation *opC) { |
| 399 | return OperationEquivalence::computeHash( |
| 400 | op: const_cast<Operation *>(opC), |
| 401 | /*hashOperands=*/OperationEquivalence::ignoreHashValue, |
| 402 | /*hashResults=*/OperationEquivalence::ignoreHashValue, |
| 403 | flags: OperationEquivalence::IgnoreLocations); |
| 404 | } |
| 405 | |
| 406 | static bool isEqual(const Operation *lhs, const Operation *rhs) { |
| 407 | if (lhs == rhs) |
| 408 | return true; |
| 409 | if (lhs == getTombstoneKey() || lhs == getEmptyKey() || |
| 410 | rhs == getTombstoneKey() || rhs == getEmptyKey()) |
| 411 | return false; |
| 412 | return OperationEquivalence::isEquivalentTo( |
| 413 | lhs: const_cast<Operation *>(lhs), rhs: const_cast<Operation *>(rhs), |
| 414 | checkEquivalent: OperationEquivalence::ignoreValueEquivalence, markEquivalent: nullptr, |
| 415 | flags: OperationEquivalence::IgnoreLocations); |
| 416 | } |
| 417 | }; |
| 418 | |
| 419 | /// Utility-class for transforming a region to only have one single block for |
| 420 | /// every return-like operation. |
| 421 | class ReturnLikeExitCombiner { |
| 422 | public: |
| 423 | ReturnLikeExitCombiner(Region &topLevelRegion, CFGToSCFInterface &interface) |
| 424 | : topLevelRegion(topLevelRegion), interface(interface) {} |
| 425 | |
| 426 | /// Transforms `returnLikeOp` to a branch to the only block in the |
| 427 | /// region with an instance of `returnLikeOp`s kind. |
| 428 | void combineExit(Operation *returnLikeOp, |
| 429 | function_ref<Value(unsigned)> getSwitchValue) { |
| 430 | auto [iter, inserted] = returnLikeToCombinedExit.try_emplace(Key: returnLikeOp); |
| 431 | if (!inserted && iter->first == returnLikeOp) |
| 432 | return; |
| 433 | |
| 434 | Block *exitBlock = iter->second; |
| 435 | if (inserted) { |
| 436 | exitBlock = new Block; |
| 437 | iter->second = exitBlock; |
| 438 | topLevelRegion.push_back(block: exitBlock); |
| 439 | exitBlock->addArguments( |
| 440 | types: returnLikeOp->getOperandTypes(), |
| 441 | locs: SmallVector<Location>(returnLikeOp->getNumOperands(), |
| 442 | returnLikeOp->getLoc())); |
| 443 | } |
| 444 | |
| 445 | auto builder = OpBuilder::atBlockTerminator(block: returnLikeOp->getBlock()); |
| 446 | interface.createSingleDestinationBranch(loc: returnLikeOp->getLoc(), builder, |
| 447 | dummyFlag: getSwitchValue(0), destination: exitBlock, |
| 448 | arguments: returnLikeOp->getOperands()); |
| 449 | |
| 450 | if (!inserted) { |
| 451 | returnLikeOp->erase(); |
| 452 | return; |
| 453 | } |
| 454 | |
| 455 | returnLikeOp->moveBefore(block: exitBlock, iterator: exitBlock->end()); |
| 456 | returnLikeOp->setOperands(exitBlock->getArguments()); |
| 457 | } |
| 458 | |
| 459 | private: |
| 460 | /// Mapping of return-like operation to block. All return-like operations |
| 461 | /// of the same kind with the same attributes, properties and types are seen |
| 462 | /// as equivalent. First occurrence seen is kept in the map. |
| 463 | llvm::SmallDenseMap<Operation *, Block *, 4, ReturnLikeOpEquivalence> |
| 464 | returnLikeToCombinedExit; |
| 465 | Region &topLevelRegion; |
| 466 | CFGToSCFInterface &interface; |
| 467 | }; |
| 468 | |
| 469 | } // namespace |
| 470 | |
| 471 | /// Returns a range of all edges from `block` to each of its successors. |
| 472 | static auto successorEdges(Block *block) { |
| 473 | return llvm::map_range(C: llvm::seq(Size: block->getNumSuccessors()), |
| 474 | F: [=](unsigned index) { return Edge(block, index); }); |
| 475 | } |
| 476 | |
| 477 | /// Calculates entry, exit and back edges of the given cycle. |
| 478 | static CycleEdges |
| 479 | calculateCycleEdges(const llvm::SmallSetVector<Block *, 4> &cycles) { |
| 480 | CycleEdges result; |
| 481 | SmallPtrSet<Block *, 8> entryBlocks; |
| 482 | |
| 483 | // First identify all exit and entry edges by checking whether any successors |
| 484 | // or predecessors are from outside the cycles. |
| 485 | for (Block *block : cycles) { |
| 486 | for (auto pred = block->pred_begin(); pred != block->pred_end(); pred++) { |
| 487 | if (cycles.contains(key: *pred)) |
| 488 | continue; |
| 489 | |
| 490 | result.entryEdges.emplace_back(Args: *pred, Args: pred.getSuccessorIndex()); |
| 491 | entryBlocks.insert(Ptr: block); |
| 492 | } |
| 493 | |
| 494 | for (auto &&[succIndex, succ] : llvm::enumerate(First: block->getSuccessors())) { |
| 495 | if (cycles.contains(key: succ)) |
| 496 | continue; |
| 497 | |
| 498 | result.exitEdges.emplace_back(Args&: block, Args&: succIndex); |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | // With the entry blocks identified, find all the back edges. |
| 503 | for (Block *block : cycles) { |
| 504 | for (auto &&[succIndex, succ] : llvm::enumerate(First: block->getSuccessors())) { |
| 505 | if (!entryBlocks.contains(Ptr: succ)) |
| 506 | continue; |
| 507 | |
| 508 | result.backEdges.emplace_back(Args&: block, Args&: succIndex); |
| 509 | } |
| 510 | } |
| 511 | |
| 512 | return result; |
| 513 | } |
| 514 | |
| 515 | /// Creates a single entry block out of multiple entry edges using an edge |
| 516 | /// multiplexer and returns it. |
| 517 | static EdgeMultiplexer |
| 518 | createSingleEntryBlock(Location loc, ArrayRef<Edge> entryEdges, |
| 519 | function_ref<Value(unsigned)> getSwitchValue, |
| 520 | function_ref<Value(Type)> getUndefValue, |
| 521 | CFGToSCFInterface &interface) { |
| 522 | auto result = EdgeMultiplexer::create( |
| 523 | loc, entryBlocks: llvm::map_to_vector(C&: entryEdges, F: std::mem_fn(pm: &Edge::getSuccessor)), |
| 524 | getSwitchValue, getUndefValue); |
| 525 | |
| 526 | // Redirect the edges prior to creating the switch op. |
| 527 | // We guarantee that predecessors are up to date. |
| 528 | for (Edge edge : entryEdges) |
| 529 | result.redirectEdge(edge); |
| 530 | |
| 531 | auto builder = OpBuilder::atBlockBegin(block: result.getMultiplexerBlock()); |
| 532 | result.createSwitch(loc, builder, interface); |
| 533 | |
| 534 | return result; |
| 535 | } |
| 536 | |
| 537 | namespace { |
| 538 | /// Special loop properties of a structured loop. |
| 539 | /// A structured loop is a loop satisfying all of the following: |
| 540 | /// * Has at most one entry, one exit and one back edge. |
| 541 | /// * The back edge originates from the same block as the exit edge. |
| 542 | struct StructuredLoopProperties { |
| 543 | /// Block containing both the single exit edge and the single back edge. |
| 544 | Block *latch; |
| 545 | /// Loop condition of type equal to a value returned by `getSwitchValue`. |
| 546 | Value condition; |
| 547 | /// Exit block which is the only successor of the loop. |
| 548 | Block *exitBlock; |
| 549 | }; |
| 550 | } // namespace |
| 551 | |
| 552 | /// Transforms a loop into a structured loop with only a single back edge and |
| 553 | /// exiting edge, originating from the same block. |
| 554 | static FailureOr<StructuredLoopProperties> createSingleExitingLatch( |
| 555 | Location loc, ArrayRef<Edge> backEdges, ArrayRef<Edge> exitEdges, |
| 556 | function_ref<Value(unsigned)> getSwitchValue, |
| 557 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
| 558 | ReturnLikeExitCombiner &exitCombiner) { |
| 559 | assert(llvm::all_equal( |
| 560 | llvm::map_range(backEdges, std::mem_fn(&Edge::getSuccessor))) && |
| 561 | "All repetition edges must lead to the single loop header" ); |
| 562 | |
| 563 | // First create the multiplexer block, which will be our latch, for all back |
| 564 | // edges and exit edges. We pass an additional argument to the multiplexer |
| 565 | // block which indicates whether the latch was reached from what was |
| 566 | // originally a back edge or an exit block. |
| 567 | // This is later used to branch using the new only back edge. |
| 568 | SmallVector<Block *> successors; |
| 569 | llvm::append_range( |
| 570 | C&: successors, R: llvm::map_range(C&: backEdges, F: std::mem_fn(pm: &Edge::getSuccessor))); |
| 571 | llvm::append_range( |
| 572 | C&: successors, R: llvm::map_range(C&: exitEdges, F: std::mem_fn(pm: &Edge::getSuccessor))); |
| 573 | auto multiplexer = |
| 574 | EdgeMultiplexer::create(loc, entryBlocks: successors, getSwitchValue, getUndefValue, |
| 575 | /*extraArgs=*/getSwitchValue(0).getType()); |
| 576 | |
| 577 | auto *latchBlock = multiplexer.getMultiplexerBlock(); |
| 578 | |
| 579 | // Create a separate exit block that comes right after the latch. |
| 580 | auto *exitBlock = new Block; |
| 581 | exitBlock->insertAfter(block: latchBlock); |
| 582 | |
| 583 | // Since this is a loop, all back edges point to the same loop header. |
| 584 | Block * = backEdges.front().getSuccessor(); |
| 585 | |
| 586 | // Redirect the edges prior to creating the switch op. |
| 587 | // We guarantee that predecessors are up to date. |
| 588 | |
| 589 | // Redirecting back edges with `shouldRepeat` as 1. |
| 590 | for (Edge backEdge : backEdges) |
| 591 | multiplexer.redirectEdge(edge: backEdge, /*extraArgs=*/getSwitchValue(1)); |
| 592 | |
| 593 | // Redirecting exits edges with `shouldRepeat` as 0. |
| 594 | for (Edge exitEdge : exitEdges) |
| 595 | multiplexer.redirectEdge(edge: exitEdge, /*extraArgs=*/getSwitchValue(0)); |
| 596 | |
| 597 | // Create the new only back edge to the loop header. Branch to the |
| 598 | // exit block otherwise. |
| 599 | Value shouldRepeat = latchBlock->getArguments().back(); |
| 600 | { |
| 601 | auto builder = OpBuilder::atBlockBegin(block: latchBlock); |
| 602 | interface.createConditionalBranch( |
| 603 | loc, builder, condition: shouldRepeat, trueDest: loopHeader, |
| 604 | trueArgs: latchBlock->getArguments().take_front(N: loopHeader->getNumArguments()), |
| 605 | /*falseDest=*/exitBlock, |
| 606 | /*falseArgs=*/{}); |
| 607 | } |
| 608 | |
| 609 | { |
| 610 | auto builder = OpBuilder::atBlockBegin(block: exitBlock); |
| 611 | if (!exitEdges.empty()) { |
| 612 | // Create the switch dispatching to what were originally the multiple exit |
| 613 | // blocks. The loop header has to explicitly be excluded in the below |
| 614 | // switch as we would otherwise be creating a new loop again. All back |
| 615 | // edges leading to the loop header have already been handled in the |
| 616 | // switch above. The remaining edges can only jump to blocks outside the |
| 617 | // loop. |
| 618 | |
| 619 | SmallPtrSet<Block *, 1> excluded = {loopHeader}; |
| 620 | multiplexer.createSwitch(loc, builder, interface, excluded); |
| 621 | } else { |
| 622 | // A loop without an exit edge is a statically known infinite loop. |
| 623 | // Since structured control flow ops are not terminator ops, the caller |
| 624 | // has to create a fitting return-like unreachable terminator operation. |
| 625 | FailureOr<Operation *> terminator = interface.createUnreachableTerminator( |
| 626 | loc, builder, region&: *latchBlock->getParent()); |
| 627 | if (failed(Result: terminator)) |
| 628 | return failure(); |
| 629 | // Transform the just created transform operation in the case that an |
| 630 | // occurrence of it existed in input IR. |
| 631 | exitCombiner.combineExit(returnLikeOp: *terminator, getSwitchValue); |
| 632 | } |
| 633 | } |
| 634 | |
| 635 | return StructuredLoopProperties{.latch: latchBlock, /*condition=*/shouldRepeat, |
| 636 | .exitBlock: exitBlock}; |
| 637 | } |
| 638 | |
| 639 | /// Transforms a structured loop into a loop in reduce form. |
| 640 | /// |
| 641 | /// Reduce form is defined as a structured loop where: |
| 642 | /// (0) No values defined within the loop body are used outside the loop body. |
| 643 | /// (1) The block arguments and successor operands of the exit block are equal |
| 644 | /// to the block arguments of the loop header and the successor operands |
| 645 | /// of the back edge. |
| 646 | /// |
| 647 | /// This is required for many structured control flow ops as they tend |
| 648 | /// to not have separate "loop result arguments" and "loop iteration arguments" |
| 649 | /// at the end of the block. Rather, the "loop iteration arguments" from the |
| 650 | /// last iteration are the result of the loop. |
| 651 | /// |
| 652 | /// Note that the requirement of (0) is shared with LCSSA form in LLVM. However, |
| 653 | /// due to this being a structured loop instead of a general loop, we do not |
| 654 | /// require complicated dominance algorithms nor SSA updating making this |
| 655 | /// implementation easier than creating a generic LCSSA transformation pass. |
| 656 | static SmallVector<Value> |
| 657 | transformToReduceLoop(Block *, Block *exitBlock, |
| 658 | const llvm::SmallSetVector<Block *, 4> &loopBlocks, |
| 659 | function_ref<Value(Type)> getUndefValue, |
| 660 | DominanceInfo &dominanceInfo) { |
| 661 | Block *latch = exitBlock->getSinglePredecessor(); |
| 662 | assert(latch && |
| 663 | "Exit block must have only latch as predecessor at this point" ); |
| 664 | assert(exitBlock->getNumArguments() == 0 && |
| 665 | "Exit block mustn't have any block arguments at this point" ); |
| 666 | |
| 667 | unsigned = 0; |
| 668 | unsigned exitBlockIndex = 1; |
| 669 | if (latch->getSuccessor(i: loopHeaderIndex) != loopHeader) |
| 670 | std::swap(a&: loopHeaderIndex, b&: exitBlockIndex); |
| 671 | |
| 672 | assert(latch->getSuccessor(loopHeaderIndex) == loopHeader); |
| 673 | assert(latch->getSuccessor(exitBlockIndex) == exitBlock); |
| 674 | |
| 675 | MutableOperandRange exitBlockSuccessorOperands = |
| 676 | getMutableSuccessorOperands(block: latch, successorIndex: exitBlockIndex); |
| 677 | // Save the values as a vector, not a `MutableOperandRange` as the latter gets |
| 678 | // invalidated when mutating the operands through a different |
| 679 | // `MutableOperandRange` of the same operation. |
| 680 | SmallVector<Value> loopHeaderSuccessorOperands = |
| 681 | llvm::to_vector(Range: getSuccessorOperands(block: latch, successorIndex: loopHeaderIndex)); |
| 682 | |
| 683 | // Add all values used in the next iteration to the exit block. Replace |
| 684 | // any uses that are outside the loop with the newly created exit block. |
| 685 | for (Value arg : loopHeaderSuccessorOperands) { |
| 686 | BlockArgument exitArg = exitBlock->addArgument(type: arg.getType(), loc: arg.getLoc()); |
| 687 | exitBlockSuccessorOperands.append(values: arg); |
| 688 | arg.replaceUsesWithIf(newValue: exitArg, shouldReplace: [&](OpOperand &use) { |
| 689 | return !loopBlocks.contains(key: use.getOwner()->getBlock()); |
| 690 | }); |
| 691 | } |
| 692 | |
| 693 | // Loop below might add block arguments to the latch and loop header. |
| 694 | // Save the block arguments prior to the loop to not process these. |
| 695 | SmallVector<BlockArgument> latchBlockArgumentsPrior = |
| 696 | llvm::to_vector(Range: latch->getArguments()); |
| 697 | SmallVector<BlockArgument> = |
| 698 | llvm::to_vector(Range: loopHeader->getArguments()); |
| 699 | |
| 700 | // Go over all values defined within the loop body. If any of them are used |
| 701 | // outside the loop body, create a block argument on the exit block and loop |
| 702 | // header and replace the outside uses with the exit block argument. |
| 703 | // The loop header block argument is added to satisfy requirement (1) in the |
| 704 | // reduce form condition. |
| 705 | for (Block *loopBlock : loopBlocks) { |
| 706 | // Cache dominance queries for loopBlock. |
| 707 | // There are likely to be many duplicate queries as there can be many value |
| 708 | // definitions within a block. |
| 709 | llvm::SmallDenseMap<Block *, bool> dominanceCache; |
| 710 | // Returns true if `loopBlock` dominates `block`. |
| 711 | auto loopBlockDominates = [&](Block *block) { |
| 712 | auto [iter, inserted] = dominanceCache.try_emplace(Key: block); |
| 713 | if (!inserted) |
| 714 | return iter->second; |
| 715 | iter->second = dominanceInfo.dominates(a: loopBlock, b: block); |
| 716 | return iter->second; |
| 717 | }; |
| 718 | |
| 719 | auto checkValue = [&](Value value) { |
| 720 | Value blockArgument; |
| 721 | for (OpOperand &use : llvm::make_early_inc_range(Range: value.getUses())) { |
| 722 | // Go through all the parent blocks and find the one part of the region |
| 723 | // of the loop. If the block is part of the loop, then the value does |
| 724 | // not escape the loop through this use. |
| 725 | Block *currBlock = use.getOwner()->getBlock(); |
| 726 | while (currBlock && currBlock->getParent() != loopHeader->getParent()) |
| 727 | currBlock = currBlock->getParentOp()->getBlock(); |
| 728 | if (loopBlocks.contains(key: currBlock)) |
| 729 | continue; |
| 730 | |
| 731 | // Block argument is only created the first time it is required. |
| 732 | if (!blockArgument) { |
| 733 | blockArgument = |
| 734 | exitBlock->addArgument(type: value.getType(), loc: value.getLoc()); |
| 735 | loopHeader->addArgument(type: value.getType(), loc: value.getLoc()); |
| 736 | |
| 737 | // `value` might be defined in a block that does not dominate `latch` |
| 738 | // but previously dominated an exit block with a use. |
| 739 | // In this case, add a block argument to the latch and go through all |
| 740 | // predecessors. If the value dominates the predecessor, pass the |
| 741 | // value as a successor operand, otherwise pass undef. |
| 742 | // The above is unnecessary if the value is a block argument of the |
| 743 | // latch or if `value` dominates all predecessors. |
| 744 | Value argument = value; |
| 745 | if (value.getParentBlock() != latch && |
| 746 | llvm::any_of(Range: latch->getPredecessors(), P: [&](Block *pred) { |
| 747 | return !loopBlockDominates(pred); |
| 748 | })) { |
| 749 | argument = latch->addArgument(type: value.getType(), loc: value.getLoc()); |
| 750 | for (auto iter = latch->pred_begin(); iter != latch->pred_end(); |
| 751 | ++iter) { |
| 752 | Value succOperand = value; |
| 753 | if (!loopBlockDominates(*iter)) |
| 754 | succOperand = getUndefValue(value.getType()); |
| 755 | |
| 756 | getMutableSuccessorOperands(block: *iter, successorIndex: iter.getSuccessorIndex()) |
| 757 | .append(values: succOperand); |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | loopHeaderSuccessorOperands.push_back(Elt: argument); |
| 762 | for (Edge edge : successorEdges(block: latch)) |
| 763 | edge.getMutableSuccessorOperands().append(values: argument); |
| 764 | } |
| 765 | |
| 766 | use.set(blockArgument); |
| 767 | } |
| 768 | }; |
| 769 | |
| 770 | if (loopBlock == latch) |
| 771 | llvm::for_each(Range&: latchBlockArgumentsPrior, F: checkValue); |
| 772 | else if (loopBlock == loopHeader) |
| 773 | llvm::for_each(Range&: loopHeaderArgumentsPrior, F: checkValue); |
| 774 | else |
| 775 | llvm::for_each(Range: loopBlock->getArguments(), F: checkValue); |
| 776 | |
| 777 | for (Operation &op : *loopBlock) |
| 778 | llvm::for_each(Range: op.getResults(), F: checkValue); |
| 779 | } |
| 780 | |
| 781 | // New block arguments may have been added to the loop header. |
| 782 | // Adjust the entry edges to pass undef values to these. |
| 783 | for (auto iter = loopHeader->pred_begin(); iter != loopHeader->pred_end(); |
| 784 | ++iter) { |
| 785 | // Latch successor arguments have already been handled. |
| 786 | if (*iter == latch) |
| 787 | continue; |
| 788 | |
| 789 | MutableOperandRange succOps = |
| 790 | getMutableSuccessorOperands(block: *iter, successorIndex: iter.getSuccessorIndex()); |
| 791 | succOps.append(values: llvm::map_to_vector( |
| 792 | C: loopHeader->getArguments().drop_front(N: succOps.size()), |
| 793 | F: [&](BlockArgument arg) { return getUndefValue(arg.getType()); })); |
| 794 | } |
| 795 | |
| 796 | return loopHeaderSuccessorOperands; |
| 797 | } |
| 798 | |
| 799 | /// Transforms all outer-most cycles in the region with the region entry |
| 800 | /// `regionEntry` into structured loops. Returns the entry blocks of any newly |
| 801 | /// created regions potentially requiring further transformations. |
| 802 | static FailureOr<SmallVector<Block *>> transformCyclesToSCFLoops( |
| 803 | Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue, |
| 804 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
| 805 | DominanceInfo &dominanceInfo, ReturnLikeExitCombiner &exitCombiner) { |
| 806 | SmallVector<Block *> newSubRegions; |
| 807 | auto scc = llvm::scc_begin(G: regionEntry); |
| 808 | while (!scc.isAtEnd()) { |
| 809 | if (!scc.hasCycle()) { |
| 810 | ++scc; |
| 811 | continue; |
| 812 | } |
| 813 | |
| 814 | // Save the set and increment the SCC iterator early to avoid our |
| 815 | // modifications breaking the SCC iterator. |
| 816 | llvm::SmallSetVector<Block *, 4> cycleBlockSet(scc->begin(), scc->end()); |
| 817 | ++scc; |
| 818 | |
| 819 | CycleEdges edges = calculateCycleEdges(cycles: cycleBlockSet); |
| 820 | Block * = edges.entryEdges.front().getSuccessor(); |
| 821 | // First turn the cycle into a loop by creating a single entry block if |
| 822 | // needed. |
| 823 | if (edges.entryEdges.size() > 1) { |
| 824 | SmallVector<Edge> edgesToEntryBlocks; |
| 825 | llvm::append_range(C&: edgesToEntryBlocks, R&: edges.entryEdges); |
| 826 | llvm::append_range(C&: edgesToEntryBlocks, R&: edges.backEdges); |
| 827 | |
| 828 | EdgeMultiplexer multiplexer = createSingleEntryBlock( |
| 829 | loc: loopHeader->getTerminator()->getLoc(), entryEdges: edgesToEntryBlocks, |
| 830 | getSwitchValue, getUndefValue, interface); |
| 831 | |
| 832 | loopHeader = multiplexer.getMultiplexerBlock(); |
| 833 | } |
| 834 | cycleBlockSet.insert(X: loopHeader); |
| 835 | |
| 836 | // Then turn it into a structured loop by creating a single latch. |
| 837 | FailureOr<StructuredLoopProperties> loopProperties = |
| 838 | createSingleExitingLatch( |
| 839 | loc: edges.backEdges.front().getFromBlock()->getTerminator()->getLoc(), |
| 840 | backEdges: edges.backEdges, exitEdges: edges.exitEdges, getSwitchValue, getUndefValue, |
| 841 | interface, exitCombiner); |
| 842 | if (failed(Result: loopProperties)) |
| 843 | return failure(); |
| 844 | |
| 845 | Block *latchBlock = loopProperties->latch; |
| 846 | Block *exitBlock = loopProperties->exitBlock; |
| 847 | cycleBlockSet.insert(X: latchBlock); |
| 848 | cycleBlockSet.insert(X: loopHeader); |
| 849 | |
| 850 | // Finally, turn it into reduce form. |
| 851 | SmallVector<Value> iterationValues = transformToReduceLoop( |
| 852 | loopHeader, exitBlock, loopBlocks: cycleBlockSet, getUndefValue, dominanceInfo); |
| 853 | |
| 854 | // Create a block acting as replacement for the loop header and insert |
| 855 | // the structured loop into it. |
| 856 | auto *newLoopParentBlock = new Block; |
| 857 | newLoopParentBlock->insertBefore(block: loopHeader); |
| 858 | addBlockArgumentsFromOther(block: newLoopParentBlock, other: loopHeader); |
| 859 | |
| 860 | Region::BlockListType &blocks = regionEntry->getParent()->getBlocks(); |
| 861 | Region loopBody; |
| 862 | // Make sure the loop header is the entry block. |
| 863 | loopBody.push_back(block: blocks.remove(IT: loopHeader)); |
| 864 | for (Block *block : cycleBlockSet) |
| 865 | if (block != latchBlock && block != loopHeader) |
| 866 | loopBody.push_back(block: blocks.remove(IT: block)); |
| 867 | // And the latch is the last block. |
| 868 | loopBody.push_back(block: blocks.remove(IT: latchBlock)); |
| 869 | |
| 870 | Operation *oldTerminator = latchBlock->getTerminator(); |
| 871 | oldTerminator->remove(); |
| 872 | |
| 873 | auto builder = OpBuilder::atBlockBegin(block: newLoopParentBlock); |
| 874 | FailureOr<Operation *> structuredLoopOp = |
| 875 | interface.createStructuredDoWhileLoopOp( |
| 876 | builder, replacedOp: oldTerminator, loopValuesInit: newLoopParentBlock->getArguments(), |
| 877 | condition: loopProperties->condition, loopValuesNextIter: iterationValues, loopBody: std::move(loopBody)); |
| 878 | if (failed(Result: structuredLoopOp)) |
| 879 | return failure(); |
| 880 | oldTerminator->erase(); |
| 881 | |
| 882 | newSubRegions.push_back(Elt: loopHeader); |
| 883 | |
| 884 | for (auto &&[oldValue, newValue] : llvm::zip( |
| 885 | t: exitBlock->getArguments(), u: (*structuredLoopOp)->getResults())) |
| 886 | oldValue.replaceAllUsesWith(newValue); |
| 887 | |
| 888 | loopHeader->replaceAllUsesWith(newValue&: newLoopParentBlock); |
| 889 | // Merge the exit block right after the loop operation. |
| 890 | newLoopParentBlock->getOperations().splice(where: newLoopParentBlock->end(), |
| 891 | L2&: exitBlock->getOperations()); |
| 892 | exitBlock->erase(); |
| 893 | } |
| 894 | return newSubRegions; |
| 895 | } |
| 896 | |
| 897 | /// Makes sure the branch region only has a single exit. This is required by the |
| 898 | /// recursive part of the algorithm, as it expects the CFG to be single-entry |
| 899 | /// and single-exit. This is done by simply creating an empty block if there |
| 900 | /// is more than one block with an edge to the continuation block. All blocks |
| 901 | /// with edges to the continuation are then redirected to this block. A region |
| 902 | /// terminator is later placed into the block. |
| 903 | static void createSingleExitBranchRegion( |
| 904 | ArrayRef<Block *> branchRegion, Block *continuation, |
| 905 | SmallVectorImpl<std::pair<Block *, SmallVector<Value>>> &createdEmptyBlocks, |
| 906 | Region &conditionalRegion) { |
| 907 | Block *singleExitBlock = nullptr; |
| 908 | std::optional<Edge> previousEdgeToContinuation; |
| 909 | Region::BlockListType &parentBlockList = |
| 910 | branchRegion.front()->getParent()->getBlocks(); |
| 911 | for (Block *block : branchRegion) { |
| 912 | for (Edge edge : successorEdges(block)) { |
| 913 | if (edge.getSuccessor() != continuation) |
| 914 | continue; |
| 915 | |
| 916 | if (!previousEdgeToContinuation) { |
| 917 | previousEdgeToContinuation = edge; |
| 918 | continue; |
| 919 | } |
| 920 | |
| 921 | // If this is not the first edge to the continuation we create the |
| 922 | // single exit block and redirect the edges. |
| 923 | if (!singleExitBlock) { |
| 924 | singleExitBlock = new Block; |
| 925 | addBlockArgumentsFromOther(block: singleExitBlock, other: continuation); |
| 926 | previousEdgeToContinuation->setSuccessor(singleExitBlock); |
| 927 | createdEmptyBlocks.emplace_back(Args&: singleExitBlock, |
| 928 | Args: singleExitBlock->getArguments()); |
| 929 | } |
| 930 | |
| 931 | edge.setSuccessor(singleExitBlock); |
| 932 | } |
| 933 | |
| 934 | conditionalRegion.push_back(block: parentBlockList.remove(IT: block)); |
| 935 | } |
| 936 | |
| 937 | if (singleExitBlock) |
| 938 | conditionalRegion.push_back(block: singleExitBlock); |
| 939 | } |
| 940 | |
| 941 | /// Returns true if this block is an exit block of the region. |
| 942 | static bool isRegionExitBlock(Block *block) { |
| 943 | return block->getNumSuccessors() == 0; |
| 944 | } |
| 945 | |
| 946 | /// Transforms the first occurrence of conditional control flow in `regionEntry` |
| 947 | /// into conditionally executed regions. Returns the entry block of the created |
| 948 | /// regions and the region after the conditional control flow. |
| 949 | static FailureOr<SmallVector<Block *>> transformToStructuredCFBranches( |
| 950 | Block *regionEntry, function_ref<Value(unsigned)> getSwitchValue, |
| 951 | function_ref<Value(Type)> getUndefValue, CFGToSCFInterface &interface, |
| 952 | DominanceInfo &dominanceInfo) { |
| 953 | // Trivial region. |
| 954 | if (regionEntry->getNumSuccessors() == 0) |
| 955 | return SmallVector<Block *>{}; |
| 956 | |
| 957 | if (regionEntry->getNumSuccessors() == 1) { |
| 958 | // Single successor we can just splice together. |
| 959 | Block *successor = regionEntry->getSuccessor(i: 0); |
| 960 | for (auto &&[oldValue, newValue] : llvm::zip( |
| 961 | t: successor->getArguments(), u: getSuccessorOperands(block: regionEntry, successorIndex: 0))) |
| 962 | oldValue.replaceAllUsesWith(newValue); |
| 963 | regionEntry->getTerminator()->erase(); |
| 964 | |
| 965 | regionEntry->getOperations().splice(where: regionEntry->end(), |
| 966 | L2&: successor->getOperations()); |
| 967 | successor->erase(); |
| 968 | return SmallVector<Block *>{regionEntry}; |
| 969 | } |
| 970 | |
| 971 | // Split the CFG into "#numSuccessor + 1" regions. |
| 972 | // For every edge to a successor, the blocks it solely dominates are |
| 973 | // determined and become the region following that edge. |
| 974 | // The last region is the continuation that follows the branch regions. |
| 975 | SmallPtrSet<Block *, 8> notContinuation; |
| 976 | notContinuation.insert(Ptr: regionEntry); |
| 977 | SmallVector<SmallVector<Block *>> successorBranchRegions( |
| 978 | regionEntry->getNumSuccessors()); |
| 979 | for (auto &&[blockList, succ] : |
| 980 | llvm::zip(t&: successorBranchRegions, u: regionEntry->getSuccessors())) { |
| 981 | // If the region entry is not the only predecessor, then the edge does not |
| 982 | // dominate the block it leads to. |
| 983 | if (succ->getSinglePredecessor() != regionEntry) |
| 984 | continue; |
| 985 | |
| 986 | // Otherwise get all blocks it dominates in DFS/pre-order. |
| 987 | DominanceInfoNode *node = dominanceInfo.getNode(a: succ); |
| 988 | for (DominanceInfoNode *curr : llvm::depth_first(G: node)) { |
| 989 | blockList.push_back(Elt: curr->getBlock()); |
| 990 | notContinuation.insert(Ptr: curr->getBlock()); |
| 991 | } |
| 992 | } |
| 993 | |
| 994 | // Finds all relevant edges and checks the shape of the control flow graph at |
| 995 | // this point. |
| 996 | // Branch regions may either: |
| 997 | // * Be post-dominated by the continuation |
| 998 | // * Be post-dominated by a return-like op |
| 999 | // * Dominate a return-like op and have an edge to the continuation. |
| 1000 | // |
| 1001 | // The control flow graph may then be one of three cases: |
| 1002 | // 1) All branch regions are post-dominated by the continuation. This is the |
| 1003 | // usual case. If there are multiple entry blocks into the continuation a |
| 1004 | // single entry block has to be created. A structured control flow op |
| 1005 | // can then be created from the branch regions. |
| 1006 | // |
| 1007 | // 2) No branch region has an edge to a continuation: |
| 1008 | // +-----+ |
| 1009 | // +-----+ bb0 +----+ |
| 1010 | // v +-----+ v |
| 1011 | // Region 1 +-+--+ ... +-+--+ Region n |
| 1012 | // |ret1| |ret2| |
| 1013 | // +----+ +----+ |
| 1014 | // |
| 1015 | // This can only occur if every region ends with a different kind of |
| 1016 | // return-like op. In that case the control flow operation must stay as we are |
| 1017 | // unable to create a single exit-block. We can nevertheless process all its |
| 1018 | // successors as they single-entry, single-exit regions. |
| 1019 | // |
| 1020 | // 3) Only some branch regions are post-dominated by the continuation. |
| 1021 | // The other branch regions may either be post-dominated by a return-like op |
| 1022 | // or lead to either the continuation or return-like op. |
| 1023 | // In this case we also create a single entry block like in 1) that also |
| 1024 | // includes all edges to the return-like op: |
| 1025 | // +-----+ |
| 1026 | // +-----+ bb0 +----+ |
| 1027 | // v +-----+ v |
| 1028 | // Region 1 +-+-+ ... +-+-+ Region n |
| 1029 | // +---+ +---+ |
| 1030 | // +---+ |... ... |
| 1031 | // |ret|<-+ | | |
| 1032 | // +---+ | +---+ | |
| 1033 | // +---->++ ++<---+ |
| 1034 | // | | |
| 1035 | // ++ ++ Region T |
| 1036 | // +---+ |
| 1037 | // This transforms to: |
| 1038 | // +-----+ |
| 1039 | // +-----+ bb0 +----+ |
| 1040 | // v +-----+ v |
| 1041 | // Region 1 +-+-+ ... +-+-+ Region n |
| 1042 | // +---+ +---+ |
| 1043 | // ... +-----+ ... |
| 1044 | // +---->+ bbM +<---+ |
| 1045 | // +-----+ |
| 1046 | // +-----+ | |
| 1047 | // | v |
| 1048 | // +---+ | +---+ |
| 1049 | // |ret+<---+ ++ ++ |
| 1050 | // +---+ | | |
| 1051 | // ++ ++ Region T |
| 1052 | // +---+ |
| 1053 | // |
| 1054 | // bb0 to bbM is now a single-entry, single-exit region that applies to case |
| 1055 | // 1). The control flow op at the end of bbM will trigger case 2. |
| 1056 | SmallVector<Edge> continuationEdges; |
| 1057 | bool continuationPostDominatesAllRegions = true; |
| 1058 | bool noSuccessorHasContinuationEdge = true; |
| 1059 | for (auto &&[entryEdge, branchRegion] : |
| 1060 | llvm::zip(t: successorEdges(block: regionEntry), u&: successorBranchRegions)) { |
| 1061 | |
| 1062 | // If the branch region is empty then the branch target itself is part of |
| 1063 | // the continuation. |
| 1064 | if (branchRegion.empty()) { |
| 1065 | continuationEdges.push_back(Elt: entryEdge); |
| 1066 | noSuccessorHasContinuationEdge = false; |
| 1067 | continue; |
| 1068 | } |
| 1069 | |
| 1070 | for (Block *block : branchRegion) { |
| 1071 | if (isRegionExitBlock(block)) { |
| 1072 | // If a return-like op is part of the branch region then the |
| 1073 | // continuation no longer post-dominates the branch region. |
| 1074 | // Add all its incoming edges to edge list to create the single-exit |
| 1075 | // block for all branch regions. |
| 1076 | continuationPostDominatesAllRegions = false; |
| 1077 | for (auto iter = block->pred_begin(); iter != block->pred_end(); |
| 1078 | ++iter) { |
| 1079 | continuationEdges.emplace_back(Args: *iter, Args: iter.getSuccessorIndex()); |
| 1080 | } |
| 1081 | continue; |
| 1082 | } |
| 1083 | |
| 1084 | for (Edge edge : successorEdges(block)) { |
| 1085 | if (notContinuation.contains(Ptr: edge.getSuccessor())) |
| 1086 | continue; |
| 1087 | |
| 1088 | continuationEdges.push_back(Elt: edge); |
| 1089 | noSuccessorHasContinuationEdge = false; |
| 1090 | } |
| 1091 | } |
| 1092 | } |
| 1093 | |
| 1094 | // case 2) Keep the control flow op but process its successors further. |
| 1095 | if (noSuccessorHasContinuationEdge) |
| 1096 | return llvm::to_vector(Range: regionEntry->getSuccessors()); |
| 1097 | |
| 1098 | Block *continuation = llvm::find_singleton<Block>( |
| 1099 | Range&: continuationEdges, P: [](Edge edge, bool) { return edge.getSuccessor(); }, |
| 1100 | /*AllowRepeats=*/true); |
| 1101 | |
| 1102 | // In case 3) or if not all continuation edges have the same entry block, |
| 1103 | // create a single entry block as continuation for all branch regions. |
| 1104 | if (!continuation || !continuationPostDominatesAllRegions) { |
| 1105 | EdgeMultiplexer multiplexer = createSingleEntryBlock( |
| 1106 | loc: continuationEdges.front().getFromBlock()->getTerminator()->getLoc(), |
| 1107 | entryEdges: continuationEdges, getSwitchValue, getUndefValue, interface); |
| 1108 | continuation = multiplexer.getMultiplexerBlock(); |
| 1109 | } |
| 1110 | |
| 1111 | // Trigger reprocess of case 3) after creating the single entry block. |
| 1112 | if (!continuationPostDominatesAllRegions) { |
| 1113 | // Unlike in the general case, we are explicitly revisiting the same region |
| 1114 | // entry again after having changed its control flow edges and dominance. |
| 1115 | // We have to therefore explicitly invalidate the dominance tree. |
| 1116 | dominanceInfo.invalidate(region: regionEntry->getParent()); |
| 1117 | return SmallVector<Block *>{regionEntry}; |
| 1118 | } |
| 1119 | |
| 1120 | SmallVector<Block *> newSubRegions; |
| 1121 | |
| 1122 | // Empty blocks with the values they return to the parent op. |
| 1123 | SmallVector<std::pair<Block *, SmallVector<Value>>> createdEmptyBlocks; |
| 1124 | |
| 1125 | // Create the branch regions. |
| 1126 | std::vector<Region> conditionalRegions(successorBranchRegions.size()); |
| 1127 | for (auto &&[branchRegion, entryEdge, conditionalRegion] : |
| 1128 | llvm::zip(t&: successorBranchRegions, u: successorEdges(block: regionEntry), |
| 1129 | args&: conditionalRegions)) { |
| 1130 | if (branchRegion.empty()) { |
| 1131 | // If no block is part of the branch region, we create a dummy block to |
| 1132 | // place the region terminator into. |
| 1133 | createdEmptyBlocks.emplace_back( |
| 1134 | Args: new Block, Args: llvm::to_vector(Range: entryEdge.getSuccessorOperands())); |
| 1135 | conditionalRegion.push_back(block: createdEmptyBlocks.back().first); |
| 1136 | continue; |
| 1137 | } |
| 1138 | |
| 1139 | createSingleExitBranchRegion(branchRegion, continuation, createdEmptyBlocks, |
| 1140 | conditionalRegion); |
| 1141 | |
| 1142 | // The entries of the branch regions may only have redundant block arguments |
| 1143 | // since the edge to the branch region is always dominating. |
| 1144 | Block *subRegionEntryBlock = &conditionalRegion.front(); |
| 1145 | for (auto &&[oldValue, newValue] : |
| 1146 | llvm::zip(t: subRegionEntryBlock->getArguments(), |
| 1147 | u: entryEdge.getSuccessorOperands())) |
| 1148 | oldValue.replaceAllUsesWith(newValue); |
| 1149 | |
| 1150 | subRegionEntryBlock->eraseArguments(start: 0, |
| 1151 | num: subRegionEntryBlock->getNumArguments()); |
| 1152 | newSubRegions.push_back(Elt: subRegionEntryBlock); |
| 1153 | } |
| 1154 | |
| 1155 | Operation *structuredCondOp; |
| 1156 | { |
| 1157 | auto opBuilder = OpBuilder::atBlockTerminator(block: regionEntry); |
| 1158 | FailureOr<Operation *> result = interface.createStructuredBranchRegionOp( |
| 1159 | builder&: opBuilder, controlFlowCondOp: regionEntry->getTerminator(), |
| 1160 | resultTypes: continuation->getArgumentTypes(), regions: conditionalRegions); |
| 1161 | if (failed(Result: result)) |
| 1162 | return failure(); |
| 1163 | structuredCondOp = *result; |
| 1164 | regionEntry->getTerminator()->erase(); |
| 1165 | } |
| 1166 | |
| 1167 | for (auto &&[block, valueRange] : createdEmptyBlocks) { |
| 1168 | auto builder = OpBuilder::atBlockEnd(block); |
| 1169 | LogicalResult result = interface.createStructuredBranchRegionTerminatorOp( |
| 1170 | loc: structuredCondOp->getLoc(), builder, branchRegionOp: structuredCondOp, replacedControlFlowOp: nullptr, |
| 1171 | results: valueRange); |
| 1172 | if (failed(Result: result)) |
| 1173 | return failure(); |
| 1174 | } |
| 1175 | |
| 1176 | // Any leftover users of the continuation must be from unconditional branches |
| 1177 | // in a branch region. There can only be at most one per branch region as |
| 1178 | // all branch regions have been made single-entry single-exit above. |
| 1179 | // Replace them with the region terminator. |
| 1180 | for (Operation *user : llvm::make_early_inc_range(Range: continuation->getUsers())) { |
| 1181 | assert(user->getNumSuccessors() == 1); |
| 1182 | auto builder = OpBuilder::atBlockTerminator(block: user->getBlock()); |
| 1183 | LogicalResult result = interface.createStructuredBranchRegionTerminatorOp( |
| 1184 | loc: user->getLoc(), builder, branchRegionOp: structuredCondOp, replacedControlFlowOp: user, |
| 1185 | results: getMutableSuccessorOperands(block: user->getBlock(), successorIndex: 0).getAsOperandRange()); |
| 1186 | if (failed(Result: result)) |
| 1187 | return failure(); |
| 1188 | user->erase(); |
| 1189 | } |
| 1190 | |
| 1191 | for (auto &&[oldValue, newValue] : |
| 1192 | llvm::zip(t: continuation->getArguments(), u: structuredCondOp->getResults())) |
| 1193 | oldValue.replaceAllUsesWith(newValue); |
| 1194 | |
| 1195 | // Splice together the continuations operations with the region entry. |
| 1196 | regionEntry->getOperations().splice(where: regionEntry->end(), |
| 1197 | L2&: continuation->getOperations()); |
| 1198 | |
| 1199 | continuation->erase(); |
| 1200 | |
| 1201 | // After splicing the continuation, the region has to be reprocessed as it has |
| 1202 | // new successors. |
| 1203 | newSubRegions.push_back(Elt: regionEntry); |
| 1204 | |
| 1205 | return newSubRegions; |
| 1206 | } |
| 1207 | |
| 1208 | /// Transforms the region to only have a single block for every kind of |
| 1209 | /// return-like operation that all previous occurrences of the return-like op |
| 1210 | /// branch to. If the region only contains a single kind of return-like |
| 1211 | /// operation, it creates a single-entry and single-exit region. |
| 1212 | static ReturnLikeExitCombiner createSingleExitBlocksForReturnLike( |
| 1213 | Region ®ion, function_ref<Value(unsigned)> getSwitchValue, |
| 1214 | CFGToSCFInterface &interface) { |
| 1215 | ReturnLikeExitCombiner exitCombiner(region, interface); |
| 1216 | |
| 1217 | for (Block &block : region.getBlocks()) { |
| 1218 | if (block.getNumSuccessors() != 0) |
| 1219 | continue; |
| 1220 | exitCombiner.combineExit(returnLikeOp: block.getTerminator(), getSwitchValue); |
| 1221 | } |
| 1222 | |
| 1223 | return exitCombiner; |
| 1224 | } |
| 1225 | |
| 1226 | /// Checks all preconditions of the transformation prior to any transformations. |
| 1227 | /// Returns failure if any precondition is violated. |
| 1228 | static LogicalResult checkTransformationPreconditions(Region ®ion) { |
| 1229 | for (Block &block : region.getBlocks()) |
| 1230 | if (block.hasNoPredecessors() && !block.isEntryBlock()) |
| 1231 | return block.front().emitOpError( |
| 1232 | message: "transformation does not support unreachable blocks" ); |
| 1233 | |
| 1234 | WalkResult result = region.walk(callback: [](Operation *operation) { |
| 1235 | if (operation->getNumSuccessors() == 0) |
| 1236 | return WalkResult::advance(); |
| 1237 | |
| 1238 | // This transformation requires all ops with successors to implement the |
| 1239 | // branch op interface. It is impossible to adjust their block arguments |
| 1240 | // otherwise. |
| 1241 | auto branchOpInterface = dyn_cast<BranchOpInterface>(operation); |
| 1242 | if (!branchOpInterface) { |
| 1243 | operation->emitOpError(message: "transformation does not support terminators with " |
| 1244 | "successors not implementing BranchOpInterface" ); |
| 1245 | return WalkResult::interrupt(); |
| 1246 | } |
| 1247 | // Branch operations must have no side effects. Replacing them would not be |
| 1248 | // valid otherwise. |
| 1249 | if (!isMemoryEffectFree(branchOpInterface)) { |
| 1250 | branchOpInterface->emitOpError( |
| 1251 | "transformation does not support terminators with side effects" ); |
| 1252 | return WalkResult::interrupt(); |
| 1253 | } |
| 1254 | |
| 1255 | for (unsigned index : llvm::seq(Size: operation->getNumSuccessors())) { |
| 1256 | SuccessorOperands succOps = branchOpInterface.getSuccessorOperands(index); |
| 1257 | |
| 1258 | // We cannot support operations with operation-produced successor operands |
| 1259 | // as it is currently not possible to pass them to any block arguments |
| 1260 | // other than the first. This breaks creating multiplexer blocks and would |
| 1261 | // likely need special handling elsewhere too. |
| 1262 | if (succOps.getProducedOperandCount() == 0) |
| 1263 | continue; |
| 1264 | |
| 1265 | branchOpInterface->emitOpError("transformation does not support " |
| 1266 | "operations with operation-produced " |
| 1267 | "successor operands" ); |
| 1268 | return WalkResult::interrupt(); |
| 1269 | } |
| 1270 | return WalkResult::advance(); |
| 1271 | }); |
| 1272 | return failure(IsFailure: result.wasInterrupted()); |
| 1273 | } |
| 1274 | |
| 1275 | FailureOr<bool> mlir::transformCFGToSCF(Region ®ion, |
| 1276 | CFGToSCFInterface &interface, |
| 1277 | DominanceInfo &dominanceInfo) { |
| 1278 | if (region.empty() || region.hasOneBlock()) |
| 1279 | return false; |
| 1280 | |
| 1281 | if (failed(Result: checkTransformationPreconditions(region))) |
| 1282 | return failure(); |
| 1283 | |
| 1284 | DenseMap<Type, Value> typedUndefCache; |
| 1285 | auto getUndefValue = [&](Type type) { |
| 1286 | auto [iter, inserted] = typedUndefCache.try_emplace(Key: type); |
| 1287 | if (!inserted) |
| 1288 | return iter->second; |
| 1289 | |
| 1290 | auto constantBuilder = OpBuilder::atBlockBegin(block: ®ion.front()); |
| 1291 | |
| 1292 | iter->second = |
| 1293 | interface.getUndefValue(loc: region.getLoc(), builder&: constantBuilder, type); |
| 1294 | return iter->second; |
| 1295 | }; |
| 1296 | |
| 1297 | // The transformation only creates all values in the range of 0 to |
| 1298 | // max(#numSuccessors). Therefore using a vector instead of a map. |
| 1299 | SmallVector<Value> switchValueCache; |
| 1300 | auto getSwitchValue = [&](unsigned value) { |
| 1301 | if (value < switchValueCache.size()) |
| 1302 | if (switchValueCache[value]) |
| 1303 | return switchValueCache[value]; |
| 1304 | |
| 1305 | auto constantBuilder = OpBuilder::atBlockBegin(block: ®ion.front()); |
| 1306 | |
| 1307 | switchValueCache.resize( |
| 1308 | N: std::max<size_t>(a: switchValueCache.size(), b: value + 1)); |
| 1309 | |
| 1310 | switchValueCache[value] = |
| 1311 | interface.getCFGSwitchValue(loc: region.getLoc(), builder&: constantBuilder, value); |
| 1312 | return switchValueCache[value]; |
| 1313 | }; |
| 1314 | |
| 1315 | ReturnLikeExitCombiner exitCombiner = |
| 1316 | createSingleExitBlocksForReturnLike(region, getSwitchValue, interface); |
| 1317 | |
| 1318 | // Invalidate any dominance tree on the region as the exit combiner has |
| 1319 | // added new blocks and edges. |
| 1320 | dominanceInfo.invalidate(region: ®ion); |
| 1321 | |
| 1322 | SmallVector<Block *> workList = {®ion.front()}; |
| 1323 | while (!workList.empty()) { |
| 1324 | Block *current = workList.pop_back_val(); |
| 1325 | |
| 1326 | // Turn all top-level cycles in the CFG to structured control flow first. |
| 1327 | // After this transformation, the remaining CFG ops form a DAG. |
| 1328 | FailureOr<SmallVector<Block *>> newRegions = |
| 1329 | transformCyclesToSCFLoops(regionEntry: current, getSwitchValue, getUndefValue, |
| 1330 | interface, dominanceInfo, exitCombiner); |
| 1331 | if (failed(Result: newRegions)) |
| 1332 | return failure(); |
| 1333 | |
| 1334 | // Add the newly created subregions to the worklist. These are the |
| 1335 | // bodies of the loops. |
| 1336 | llvm::append_range(C&: workList, R&: *newRegions); |
| 1337 | // Invalidate the dominance tree as blocks have been moved, created and |
| 1338 | // added during the cycle to structured loop transformation. |
| 1339 | if (!newRegions->empty()) |
| 1340 | dominanceInfo.invalidate(region: current->getParent()); |
| 1341 | |
| 1342 | newRegions = transformToStructuredCFBranches( |
| 1343 | regionEntry: current, getSwitchValue, getUndefValue, interface, dominanceInfo); |
| 1344 | if (failed(Result: newRegions)) |
| 1345 | return failure(); |
| 1346 | // Invalidating the dominance tree is generally not required by the |
| 1347 | // transformation above as the new region entries correspond to unaffected |
| 1348 | // subtrees in the dominator tree. Only its parent nodes have changed but |
| 1349 | // won't be visited again. |
| 1350 | llvm::append_range(C&: workList, R&: *newRegions); |
| 1351 | } |
| 1352 | |
| 1353 | return true; |
| 1354 | } |
| 1355 | |