1 | //===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements logic for computing correct alloc and dealloc positions. |
10 | // Furthermore, buffer deallocation also adds required new clone operations to |
11 | // ensure that all buffers are deallocated. The main class is the |
12 | // BufferDeallocationPass class that implements the underlying algorithm. In |
13 | // order to put allocations and deallocations at safe positions, it is |
14 | // significantly important to put them into the correct blocks. However, the |
15 | // liveness analysis does not pay attention to aliases, which can occur due to |
16 | // branches (and their associated block arguments) in general. For this purpose, |
17 | // BufferDeallocation firstly finds all possible aliases for a single value |
18 | // (using the BufferViewFlowAnalysis class). Consider the following example: |
19 | // |
20 | // ^bb0(%arg0): |
21 | // cf.cond_br %cond, ^bb1, ^bb2 |
22 | // ^bb1: |
23 | // cf.br ^exit(%arg0) |
24 | // ^bb2: |
25 | // %new_value = ... |
26 | // cf.br ^exit(%new_value) |
27 | // ^exit(%arg1): |
28 | // return %arg1; |
29 | // |
30 | // We should place the dealloc for %new_value in exit. However, we have to free |
31 | // the buffer in the same block, because it cannot be freed in the post |
32 | // dominator. However, this requires a new clone buffer for %arg1 that will |
33 | // contain the actual contents. Using the class BufferViewFlowAnalysis, we |
34 | // will find out that %new_value has a potential alias %arg1. In order to find |
35 | // the dealloc position we have to find all potential aliases, iterate over |
36 | // their uses and find the common post-dominator block (note that additional |
37 | // clones and buffers remove potential aliases and will influence the placement |
38 | // of the deallocs). In all cases, the computed block can be safely used to free |
39 | // the %new_value buffer (may be exit or bb2) as it will die and we can use |
40 | // liveness information to determine the exact operation after which we have to |
41 | // insert the dealloc. However, the algorithm supports introducing clone buffers |
42 | // and placing deallocs in safe locations to ensure that all buffers will be |
43 | // freed in the end. |
44 | // |
45 | // TODO: |
46 | // The current implementation does not support explicit-control-flow loops and |
47 | // the resulting code will be invalid with respect to program semantics. |
48 | // However, structured control-flow loops are fully supported. Furthermore, it |
49 | // doesn't accept functions which return buffers already. |
50 | // |
51 | //===----------------------------------------------------------------------===// |
52 | |
53 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h" |
54 | |
55 | #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" |
56 | #include "mlir/Dialect/Bufferization/IR/Bufferization.h" |
57 | #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" |
58 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
59 | #include "mlir/Dialect/MemRef/IR/MemRef.h" |
60 | #include "llvm/ADT/SetOperations.h" |
61 | |
62 | namespace mlir { |
63 | namespace bufferization { |
64 | #define GEN_PASS_DEF_BUFFERDEALLOCATION |
65 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" |
66 | } // namespace bufferization |
67 | } // namespace mlir |
68 | |
69 | using namespace mlir; |
70 | using namespace mlir::bufferization; |
71 | |
72 | /// Walks over all immediate return-like terminators in the given region. |
73 | static LogicalResult walkReturnOperations( |
74 | Region *region, |
75 | llvm::function_ref<LogicalResult(RegionBranchTerminatorOpInterface)> func) { |
76 | for (Block &block : *region) { |
77 | Operation *terminator = block.getTerminator(); |
78 | // Skip non region-return-like terminators. |
79 | if (auto regionTerminator = |
80 | dyn_cast<RegionBranchTerminatorOpInterface>(terminator)) { |
81 | if (failed(func(regionTerminator))) |
82 | return failure(); |
83 | } |
84 | } |
85 | return success(); |
86 | } |
87 | |
88 | /// Checks if all operations that have at least one attached region implement |
89 | /// the RegionBranchOpInterface. This is not required in edge cases, where we |
90 | /// have a single attached region and the parent operation has no results. |
91 | static bool validateSupportedControlFlow(Operation *op) { |
92 | WalkResult result = op->walk(callback: [&](Operation *operation) { |
93 | // Only check ops that are inside a function. |
94 | if (!operation->getParentOfType<func::FuncOp>()) |
95 | return WalkResult::advance(); |
96 | |
97 | auto regions = operation->getRegions(); |
98 | // Walk over all operations in a region and check if the operation has at |
99 | // least one region and implements the RegionBranchOpInterface. If there |
100 | // is an operation that does not fulfill this condition, we cannot apply |
101 | // the deallocation steps. Furthermore, we accept cases, where we have a |
102 | // region that returns no results, since, in that case, the intra-region |
103 | // control flow does not affect the transformation. |
104 | size_t size = regions.size(); |
105 | if (((size == 1 && !operation->getResults().empty()) || size > 1) && |
106 | !dyn_cast<RegionBranchOpInterface>(operation)) { |
107 | operation->emitError(message: "All operations with attached regions need to " |
108 | "implement the RegionBranchOpInterface." ); |
109 | } |
110 | |
111 | return WalkResult::advance(); |
112 | }); |
113 | return !result.wasSkipped(); |
114 | } |
115 | |
116 | namespace { |
117 | |
118 | //===----------------------------------------------------------------------===// |
119 | // Backedges analysis |
120 | //===----------------------------------------------------------------------===// |
121 | |
122 | /// A straight-forward program analysis which detects loop backedges induced by |
123 | /// explicit control flow. |
124 | class Backedges { |
125 | public: |
126 | using BlockSetT = SmallPtrSet<Block *, 16>; |
127 | using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>; |
128 | |
129 | public: |
130 | /// Constructs a new backedges analysis using the op provided. |
131 | Backedges(Operation *op) { recurse(op); } |
132 | |
133 | /// Returns the number of backedges formed by explicit control flow. |
134 | size_t size() const { return edgeSet.size(); } |
135 | |
136 | /// Returns the start iterator to loop over all backedges. |
137 | BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); } |
138 | |
139 | /// Returns the end iterator to loop over all backedges. |
140 | BackedgeSetT::const_iterator end() const { return edgeSet.end(); } |
141 | |
142 | private: |
143 | /// Enters the current block and inserts a backedge into the `edgeSet` if we |
144 | /// have already visited the current block. The inserted edge links the given |
145 | /// `predecessor` with the `current` block. |
146 | bool enter(Block ¤t, Block *predecessor) { |
147 | bool inserted = visited.insert(Ptr: ¤t).second; |
148 | if (!inserted) |
149 | edgeSet.insert(V: std::make_pair(x&: predecessor, y: ¤t)); |
150 | return inserted; |
151 | } |
152 | |
153 | /// Leaves the current block. |
154 | void exit(Block ¤t) { visited.erase(Ptr: ¤t); } |
155 | |
156 | /// Recurses into the given operation while taking all attached regions into |
157 | /// account. |
158 | void recurse(Operation *op) { |
159 | Block *current = op->getBlock(); |
160 | // If the current op implements the `BranchOpInterface`, there can be |
161 | // cycles in the scope of all successor blocks. |
162 | if (isa<BranchOpInterface>(Val: op)) { |
163 | for (Block *succ : current->getSuccessors()) |
164 | recurse(block&: *succ, predecessor: current); |
165 | } |
166 | // Recurse into all distinct regions and check for explicit control-flow |
167 | // loops. |
168 | for (Region ®ion : op->getRegions()) { |
169 | if (!region.empty()) |
170 | recurse(block&: region.front(), predecessor: current); |
171 | } |
172 | } |
173 | |
174 | /// Recurses into explicit control-flow structures that are given by |
175 | /// the successor relation defined on the block level. |
176 | void recurse(Block &block, Block *predecessor) { |
177 | // Try to enter the current block. If this is not possible, we are |
178 | // currently processing this block and can safely return here. |
179 | if (!enter(current&: block, predecessor)) |
180 | return; |
181 | |
182 | // Recurse into all operations and successor blocks. |
183 | for (Operation &op : block.getOperations()) |
184 | recurse(op: &op); |
185 | |
186 | // Leave the current block. |
187 | exit(current&: block); |
188 | } |
189 | |
190 | /// Stores all blocks that are currently visited and on the processing stack. |
191 | BlockSetT visited; |
192 | |
193 | /// Stores all backedges in the format (source, target). |
194 | BackedgeSetT edgeSet; |
195 | }; |
196 | |
197 | //===----------------------------------------------------------------------===// |
198 | // BufferDeallocation |
199 | //===----------------------------------------------------------------------===// |
200 | |
201 | /// The buffer deallocation transformation which ensures that all allocs in the |
202 | /// program have a corresponding de-allocation. As a side-effect, it might also |
203 | /// introduce clones that in turn leads to additional deallocations. |
204 | class BufferDeallocation : public BufferPlacementTransformationBase { |
205 | public: |
206 | using AliasAllocationMapT = |
207 | llvm::DenseMap<Value, bufferization::AllocationOpInterface>; |
208 | |
209 | BufferDeallocation(Operation *op) |
210 | : BufferPlacementTransformationBase(op), dominators(op), |
211 | postDominators(op) {} |
212 | |
213 | /// Checks if all allocation operations either provide an already existing |
214 | /// deallocation operation or implement the AllocationOpInterface. In |
215 | /// addition, this method initializes the internal alias to |
216 | /// AllocationOpInterface mapping in order to get compatible |
217 | /// AllocationOpInterface implementations for aliases. |
218 | LogicalResult prepare() { |
219 | for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { |
220 | // Get the defining allocation operation. |
221 | Value alloc = std::get<0>(t: entry); |
222 | auto allocationInterface = |
223 | alloc.getDefiningOp<bufferization::AllocationOpInterface>(); |
224 | // If there is no existing deallocation operation and no implementation of |
225 | // the AllocationOpInterface, we cannot apply the BufferDeallocation pass. |
226 | if (!std::get<1>(t: entry) && !allocationInterface) { |
227 | return alloc.getDefiningOp()->emitError( |
228 | message: "Allocation is not deallocated explicitly nor does the operation " |
229 | "implement the AllocationOpInterface." ); |
230 | } |
231 | |
232 | // Register the current allocation interface implementation. |
233 | aliasToAllocations[alloc] = allocationInterface; |
234 | |
235 | // Get the alias information for the current allocation node. |
236 | for (Value alias : aliases.resolve(value: alloc)) { |
237 | // TODO: check for incompatible implementations of the |
238 | // AllocationOpInterface. This could be realized by promoting the |
239 | // AllocationOpInterface to a DialectInterface. |
240 | aliasToAllocations[alias] = allocationInterface; |
241 | } |
242 | } |
243 | return success(); |
244 | } |
245 | |
246 | /// Performs the actual placement/creation of all temporary clone and dealloc |
247 | /// nodes. |
248 | LogicalResult deallocate() { |
249 | // Add additional clones that are required. |
250 | if (failed(result: introduceClones())) |
251 | return failure(); |
252 | |
253 | // Place deallocations for all allocation entries. |
254 | return placeDeallocs(); |
255 | } |
256 | |
257 | private: |
258 | /// Introduces required clone operations to avoid memory leaks. |
259 | LogicalResult introduceClones() { |
260 | // Initialize the set of values that require a dedicated memory free |
261 | // operation since their operands cannot be safely deallocated in a post |
262 | // dominator. |
263 | SetVector<Value> valuesToFree; |
264 | llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues; |
265 | SmallVector<std::tuple<Value, Block *>, 8> toProcess; |
266 | |
267 | // Check dominance relation for proper dominance properties. If the given |
268 | // value node does not dominate an alias, we will have to create a clone in |
269 | // order to free all buffers that can potentially leak into a post |
270 | // dominator. |
271 | auto findUnsafeValues = [&](Value source, Block *definingBlock) { |
272 | auto it = aliases.find(value: source); |
273 | if (it == aliases.end()) |
274 | return; |
275 | for (Value value : it->second) { |
276 | if (valuesToFree.count(key: value) > 0) |
277 | continue; |
278 | Block *parentBlock = value.getParentBlock(); |
279 | // Check whether we have to free this particular block argument or |
280 | // generic value. We have to free the current alias if it is either |
281 | // defined in a non-dominated block or it is defined in the same block |
282 | // but the current value is not dominated by the source value. |
283 | if (!dominators.dominates(a: definingBlock, b: parentBlock) || |
284 | (definingBlock == parentBlock && isa<BlockArgument>(Val: value))) { |
285 | toProcess.emplace_back(Args&: value, Args&: parentBlock); |
286 | valuesToFree.insert(X: value); |
287 | } else if (visitedValues.insert(V: std::make_tuple(args&: value, args&: definingBlock)) |
288 | .second) |
289 | toProcess.emplace_back(Args&: value, Args&: definingBlock); |
290 | } |
291 | }; |
292 | |
293 | // Detect possibly unsafe aliases starting from all allocations. |
294 | for (BufferPlacementAllocs::AllocEntry &entry : allocs) { |
295 | Value allocValue = std::get<0>(t&: entry); |
296 | findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock()); |
297 | } |
298 | // Try to find block arguments that require an explicit free operation |
299 | // until we reach a fix point. |
300 | while (!toProcess.empty()) { |
301 | auto current = toProcess.pop_back_val(); |
302 | findUnsafeValues(std::get<0>(t&: current), std::get<1>(t&: current)); |
303 | } |
304 | |
305 | // Update buffer aliases to ensure that we free all buffers and block |
306 | // arguments at the correct locations. |
307 | aliases.remove(aliasValues: valuesToFree); |
308 | |
309 | // Add new allocs and additional clone operations. |
310 | for (Value value : valuesToFree) { |
311 | if (failed(result: isa<BlockArgument>(Val: value) |
312 | ? introduceBlockArgCopy(blockArg: cast<BlockArgument>(Val&: value)) |
313 | : introduceValueCopyForRegionResult(value))) |
314 | return failure(); |
315 | |
316 | // Register the value to require a final dealloc. Note that we do not have |
317 | // to assign a block here since we do not want to move the allocation node |
318 | // to another location. |
319 | allocs.registerAlloc(entry: std::make_tuple(args&: value, args: nullptr)); |
320 | } |
321 | return success(); |
322 | } |
323 | |
324 | /// Introduces temporary clones in all predecessors and copies the source |
325 | /// values into the newly allocated buffers. |
326 | LogicalResult introduceBlockArgCopy(BlockArgument blockArg) { |
327 | // Allocate a buffer for the current block argument in the block of |
328 | // the associated value (which will be a predecessor block by |
329 | // definition). |
330 | Block *block = blockArg.getOwner(); |
331 | for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) { |
332 | // Get the terminator and the value that will be passed to our |
333 | // argument. |
334 | Operation *terminator = (*it)->getTerminator(); |
335 | auto branchInterface = cast<BranchOpInterface>(terminator); |
336 | SuccessorOperands operands = |
337 | branchInterface.getSuccessorOperands(it.getSuccessorIndex()); |
338 | |
339 | // Query the associated source value. |
340 | Value sourceValue = operands[blockArg.getArgNumber()]; |
341 | if (!sourceValue) { |
342 | return failure(); |
343 | } |
344 | // Wire new clone and successor operand. |
345 | // Create a new clone at the current location of the terminator. |
346 | auto clone = introduceCloneBuffers(sourceValue, terminator); |
347 | if (failed(clone)) |
348 | return failure(); |
349 | operands.slice(subStart: blockArg.getArgNumber(), subLen: 1).assign(*clone); |
350 | } |
351 | |
352 | // Check whether the block argument has implicitly defined predecessors via |
353 | // the RegionBranchOpInterface. This can be the case if the current block |
354 | // argument belongs to the first block in a region and the parent operation |
355 | // implements the RegionBranchOpInterface. |
356 | Region *argRegion = block->getParent(); |
357 | Operation *parentOp = argRegion->getParentOp(); |
358 | RegionBranchOpInterface regionInterface; |
359 | if (&argRegion->front() != block || |
360 | !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp))) |
361 | return success(); |
362 | |
363 | if (failed(introduceClonesForRegionSuccessors( |
364 | regionInterface, argRegion->getParentOp()->getRegions(), blockArg, |
365 | [&](RegionSuccessor &successorRegion) { |
366 | // Find a predecessor of our argRegion. |
367 | return successorRegion.getSuccessor() == argRegion; |
368 | }))) |
369 | return failure(); |
370 | |
371 | // Check whether the block argument belongs to an entry region of the |
372 | // parent operation. In this case, we have to introduce an additional clone |
373 | // for buffer that is passed to the argument. |
374 | SmallVector<RegionSuccessor, 2> successorRegions; |
375 | regionInterface.getSuccessorRegions(/*point=*/RegionBranchPoint::parent(), |
376 | successorRegions); |
377 | auto *it = |
378 | llvm::find_if(Range&: successorRegions, P: [&](RegionSuccessor &successorRegion) { |
379 | return successorRegion.getSuccessor() == argRegion; |
380 | }); |
381 | if (it == successorRegions.end()) |
382 | return success(); |
383 | |
384 | // Determine the actual operand to introduce a clone for and rewire the |
385 | // operand to point to the clone instead. |
386 | auto operands = regionInterface.getEntrySuccessorOperands(argRegion); |
387 | size_t operandIndex = |
388 | llvm::find(Range: it->getSuccessorInputs(), Val: blockArg).getIndex() + |
389 | operands.getBeginOperandIndex(); |
390 | Value operand = parentOp->getOperand(idx: operandIndex); |
391 | assert(operand == |
392 | operands[operandIndex - operands.getBeginOperandIndex()] && |
393 | "region interface operands don't match parentOp operands" ); |
394 | auto clone = introduceCloneBuffers(sourceValue: operand, terminator: parentOp); |
395 | if (failed(clone)) |
396 | return failure(); |
397 | |
398 | parentOp->setOperand(idx: operandIndex, value: *clone); |
399 | return success(); |
400 | } |
401 | |
402 | /// Introduces temporary clones in front of all associated nested-region |
403 | /// terminators and copies the source values into the newly allocated buffers. |
404 | LogicalResult introduceValueCopyForRegionResult(Value value) { |
405 | // Get the actual result index in the scope of the parent terminator. |
406 | Operation *operation = value.getDefiningOp(); |
407 | auto regionInterface = cast<RegionBranchOpInterface>(operation); |
408 | // Filter successors that return to the parent operation. |
409 | auto regionPredicate = [&](RegionSuccessor &successorRegion) { |
410 | // If the RegionSuccessor has no associated successor, it will return to |
411 | // its parent operation. |
412 | return !successorRegion.getSuccessor(); |
413 | }; |
414 | // Introduce a clone for all region "results" that are returned to the |
415 | // parent operation. This is required since the parent's result value has |
416 | // been considered critical. Therefore, the algorithm assumes that a clone |
417 | // of a previously allocated buffer is returned by the operation (like in |
418 | // the case of a block argument). |
419 | return introduceClonesForRegionSuccessors( |
420 | regionInterface, operation->getRegions(), value, regionPredicate); |
421 | } |
422 | |
423 | /// Introduces buffer clones for all terminators in the given regions. The |
424 | /// regionPredicate is applied to every successor region in order to restrict |
425 | /// the clones to specific regions. |
426 | template <typename TPredicate> |
427 | LogicalResult introduceClonesForRegionSuccessors( |
428 | RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions, |
429 | Value argValue, const TPredicate ®ionPredicate) { |
430 | for (Region ®ion : regions) { |
431 | // Query the regionInterface to get all successor regions of the current |
432 | // one. |
433 | SmallVector<RegionSuccessor, 2> successorRegions; |
434 | regionInterface.getSuccessorRegions(region, successorRegions); |
435 | // Try to find a matching region successor. |
436 | RegionSuccessor *regionSuccessor = |
437 | llvm::find_if(successorRegions, regionPredicate); |
438 | if (regionSuccessor == successorRegions.end()) |
439 | continue; |
440 | // Get the operand index in the context of the current successor input |
441 | // bindings. |
442 | size_t operandIndex = |
443 | llvm::find(Range: regionSuccessor->getSuccessorInputs(), Val: argValue) |
444 | .getIndex(); |
445 | |
446 | // Iterate over all immediate terminator operations to introduce |
447 | // new buffer allocations. Thereby, the appropriate terminator operand |
448 | // will be adjusted to point to the newly allocated buffer instead. |
449 | if (failed(walkReturnOperations( |
450 | ®ion, [&](RegionBranchTerminatorOpInterface terminator) { |
451 | // Get the actual mutable operands for this terminator op. |
452 | auto terminatorOperands = |
453 | terminator.getMutableSuccessorOperands(*regionSuccessor); |
454 | // Extract the source value from the current terminator. |
455 | // This conversion needs to exist on a separate line due to a |
456 | // bug in GCC conversion analysis. |
457 | OperandRange immutableTerminatorOperands = terminatorOperands; |
458 | Value sourceValue = immutableTerminatorOperands[operandIndex]; |
459 | // Create a new clone at the current location of the terminator. |
460 | auto clone = introduceCloneBuffers(sourceValue, terminator); |
461 | if (failed(clone)) |
462 | return failure(); |
463 | // Wire clone and terminator operand. |
464 | terminatorOperands.slice(operandIndex, 1).assign(*clone); |
465 | return success(); |
466 | }))) |
467 | return failure(); |
468 | } |
469 | return success(); |
470 | } |
471 | |
472 | /// Creates a new memory allocation for the given source value and clones |
473 | /// its content into the newly allocated buffer. The terminator operation is |
474 | /// used to insert the clone operation at the right place. |
475 | FailureOr<Value> introduceCloneBuffers(Value sourceValue, |
476 | Operation *terminator) { |
477 | // Avoid multiple clones of the same source value. This can happen in the |
478 | // presence of loops when a branch acts as a backedge while also having |
479 | // another successor that returns to its parent operation. Note: that |
480 | // copying copied buffers can introduce memory leaks since the invariant of |
481 | // BufferDeallocation assumes that a buffer will be only cloned once into a |
482 | // temporary buffer. Hence, the construction of clone chains introduces |
483 | // additional allocations that are not tracked automatically by the |
484 | // algorithm. |
485 | if (clonedValues.contains(Ptr: sourceValue)) |
486 | return sourceValue; |
487 | // Create a new clone operation that copies the contents of the old |
488 | // buffer to the new one. |
489 | auto clone = buildClone(op: terminator, alloc: sourceValue); |
490 | if (succeeded(result: clone)) { |
491 | // Remember the clone of original source value. |
492 | clonedValues.insert(Ptr: *clone); |
493 | } |
494 | return clone; |
495 | } |
496 | |
497 | /// Finds correct dealloc positions according to the algorithm described at |
498 | /// the top of the file for all alloc nodes and block arguments that can be |
499 | /// handled by this analysis. |
500 | LogicalResult placeDeallocs() { |
501 | // Move or insert deallocs using the previously computed information. |
502 | // These deallocations will be linked to their associated allocation nodes |
503 | // since they don't have any aliases that can (potentially) increase their |
504 | // liveness. |
505 | for (const BufferPlacementAllocs::AllocEntry &entry : allocs) { |
506 | Value alloc = std::get<0>(t: entry); |
507 | auto aliasesSet = aliases.resolve(value: alloc); |
508 | assert(!aliasesSet.empty() && "must contain at least one alias" ); |
509 | |
510 | // Determine the actual block to place the dealloc and get liveness |
511 | // information. |
512 | Block *placementBlock = |
513 | findCommonDominator(value: alloc, values: aliasesSet, doms: postDominators); |
514 | const LivenessBlockInfo *livenessInfo = |
515 | liveness.getLiveness(block: placementBlock); |
516 | |
517 | // We have to ensure that the dealloc will be after the last use of all |
518 | // aliases of the given value. We first assume that there are no uses in |
519 | // the placementBlock and that we can safely place the dealloc at the |
520 | // beginning. |
521 | Operation *endOperation = &placementBlock->front(); |
522 | |
523 | // Iterate over all aliases and ensure that the endOperation will point |
524 | // to the last operation of all potential aliases in the placementBlock. |
525 | for (Value alias : aliasesSet) { |
526 | // Ensure that the start operation is at least the defining operation of |
527 | // the current alias to avoid invalid placement of deallocs for aliases |
528 | // without any uses. |
529 | Operation *beforeOp = endOperation; |
530 | if (alias.getDefiningOp() && |
531 | !(beforeOp = placementBlock->findAncestorOpInBlock( |
532 | op&: *alias.getDefiningOp()))) |
533 | continue; |
534 | |
535 | Operation *aliasEndOperation = |
536 | livenessInfo->getEndOperation(value: alias, startOperation: beforeOp); |
537 | // Check whether the aliasEndOperation lies in the desired block and |
538 | // whether it is behind the current endOperation. If yes, this will be |
539 | // the new endOperation. |
540 | if (aliasEndOperation->getBlock() == placementBlock && |
541 | endOperation->isBeforeInBlock(other: aliasEndOperation)) |
542 | endOperation = aliasEndOperation; |
543 | } |
544 | // endOperation is the last operation behind which we can safely store |
545 | // the dealloc taking all potential aliases into account. |
546 | |
547 | // If there is an existing dealloc, move it to the right place. |
548 | Operation *deallocOperation = std::get<1>(t: entry); |
549 | if (deallocOperation) { |
550 | deallocOperation->moveAfter(existingOp: endOperation); |
551 | } else { |
552 | // If the Dealloc position is at the terminator operation of the |
553 | // block, then the value should escape from a deallocation. |
554 | Operation *nextOp = endOperation->getNextNode(); |
555 | if (!nextOp) |
556 | continue; |
557 | // If there is no dealloc node, insert one in the right place. |
558 | if (failed(result: buildDealloc(op: nextOp, alloc))) |
559 | return failure(); |
560 | } |
561 | } |
562 | return success(); |
563 | } |
564 | |
565 | /// Builds a deallocation operation compatible with the given allocation |
566 | /// value. If there is no registered AllocationOpInterface implementation for |
567 | /// the given value (e.g. in the case of a function parameter), this method |
568 | /// builds a memref::DeallocOp. |
569 | LogicalResult buildDealloc(Operation *op, Value alloc) { |
570 | OpBuilder builder(op); |
571 | auto it = aliasToAllocations.find(alloc); |
572 | if (it != aliasToAllocations.end()) { |
573 | // Call the allocation op interface to build a supported and |
574 | // compatible deallocation operation. |
575 | auto dealloc = it->second.buildDealloc(builder, alloc); |
576 | if (!dealloc) |
577 | return op->emitError() |
578 | << "allocations without compatible deallocations are " |
579 | "not supported" ; |
580 | } else { |
581 | // Build a "default" DeallocOp for unknown allocation sources. |
582 | builder.create<memref::DeallocOp>(alloc.getLoc(), alloc); |
583 | } |
584 | return success(); |
585 | } |
586 | |
587 | /// Builds a clone operation compatible with the given allocation value. If |
588 | /// there is no registered AllocationOpInterface implementation for the given |
589 | /// value (e.g. in the case of a function parameter), this method builds a |
590 | /// bufferization::CloneOp. |
591 | FailureOr<Value> buildClone(Operation *op, Value alloc) { |
592 | OpBuilder builder(op); |
593 | auto it = aliasToAllocations.find(alloc); |
594 | if (it != aliasToAllocations.end()) { |
595 | // Call the allocation op interface to build a supported and |
596 | // compatible clone operation. |
597 | auto clone = it->second.buildClone(builder, alloc); |
598 | if (clone) |
599 | return *clone; |
600 | return (LogicalResult)(op->emitError() |
601 | << "allocations without compatible clone ops " |
602 | "are not supported" ); |
603 | } |
604 | // Build a "default" CloneOp for unknown allocation sources. |
605 | return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc) |
606 | .getResult(); |
607 | } |
608 | |
609 | /// The dominator info to find the appropriate start operation to move the |
610 | /// allocs. |
611 | DominanceInfo dominators; |
612 | |
613 | /// The post dominator info to move the dependent allocs in the right |
614 | /// position. |
615 | PostDominanceInfo postDominators; |
616 | |
617 | /// Stores already cloned buffers to avoid additional clones of clones. |
618 | ValueSetT clonedValues; |
619 | |
620 | /// Maps aliases to their source allocation interfaces (inverse mapping). |
621 | AliasAllocationMapT aliasToAllocations; |
622 | }; |
623 | |
624 | //===----------------------------------------------------------------------===// |
625 | // BufferDeallocationPass |
626 | //===----------------------------------------------------------------------===// |
627 | |
628 | /// The actual buffer deallocation pass that inserts and moves dealloc nodes |
629 | /// into the right positions. Furthermore, it inserts additional clones if |
630 | /// necessary. It uses the algorithm described at the top of the file. |
631 | struct BufferDeallocationPass |
632 | : public bufferization::impl::BufferDeallocationBase< |
633 | BufferDeallocationPass> { |
634 | void getDependentDialects(DialectRegistry ®istry) const override { |
635 | registry.insert<bufferization::BufferizationDialect>(); |
636 | registry.insert<memref::MemRefDialect>(); |
637 | } |
638 | |
639 | void runOnOperation() override { |
640 | func::FuncOp func = getOperation(); |
641 | if (func.isExternal()) |
642 | return; |
643 | |
644 | if (failed(deallocateBuffers(func))) |
645 | signalPassFailure(); |
646 | } |
647 | }; |
648 | |
649 | } // namespace |
650 | |
651 | LogicalResult bufferization::deallocateBuffers(Operation *op) { |
652 | if (isa<ModuleOp>(Val: op)) { |
653 | WalkResult result = op->walk([&](func::FuncOp funcOp) { |
654 | if (failed(deallocateBuffers(funcOp))) |
655 | return WalkResult::interrupt(); |
656 | return WalkResult::advance(); |
657 | }); |
658 | return success(isSuccess: !result.wasInterrupted()); |
659 | } |
660 | |
661 | // Ensure that there are supported loops only. |
662 | Backedges backedges(op); |
663 | if (backedges.size()) { |
664 | op->emitError(message: "Only structured control-flow loops are supported." ); |
665 | return failure(); |
666 | } |
667 | |
668 | // Check that the control flow structures are supported. |
669 | if (!validateSupportedControlFlow(op)) |
670 | return failure(); |
671 | |
672 | // Gather all required allocation nodes and prepare the deallocation phase. |
673 | BufferDeallocation deallocation(op); |
674 | |
675 | // Check for supported AllocationOpInterface implementations and prepare the |
676 | // internal deallocation pass. |
677 | if (failed(result: deallocation.prepare())) |
678 | return failure(); |
679 | |
680 | // Place all required temporary clone and dealloc nodes. |
681 | if (failed(result: deallocation.deallocate())) |
682 | return failure(); |
683 | |
684 | return success(); |
685 | } |
686 | |
687 | //===----------------------------------------------------------------------===// |
688 | // BufferDeallocationPass construction |
689 | //===----------------------------------------------------------------------===// |
690 | |
691 | std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() { |
692 | return std::make_unique<BufferDeallocationPass>(); |
693 | } |
694 | |