1//===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for computing correct alloc and dealloc positions.
10// Furthermore, buffer deallocation also adds required new clone operations to
11// ensure that all buffers are deallocated. The main class is the
12// BufferDeallocationPass class that implements the underlying algorithm. In
13// order to put allocations and deallocations at safe positions, it is
14// significantly important to put them into the correct blocks. However, the
15// liveness analysis does not pay attention to aliases, which can occur due to
16// branches (and their associated block arguments) in general. For this purpose,
17// BufferDeallocation firstly finds all possible aliases for a single value
18// (using the BufferViewFlowAnalysis class). Consider the following example:
19//
20// ^bb0(%arg0):
21// cf.cond_br %cond, ^bb1, ^bb2
22// ^bb1:
23// cf.br ^exit(%arg0)
24// ^bb2:
25// %new_value = ...
26// cf.br ^exit(%new_value)
27// ^exit(%arg1):
28// return %arg1;
29//
30// We should place the dealloc for %new_value in exit. However, we have to free
31// the buffer in the same block, because it cannot be freed in the post
32// dominator. However, this requires a new clone buffer for %arg1 that will
33// contain the actual contents. Using the class BufferViewFlowAnalysis, we
34// will find out that %new_value has a potential alias %arg1. In order to find
35// the dealloc position we have to find all potential aliases, iterate over
36// their uses and find the common post-dominator block (note that additional
37// clones and buffers remove potential aliases and will influence the placement
38// of the deallocs). In all cases, the computed block can be safely used to free
39// the %new_value buffer (may be exit or bb2) as it will die and we can use
40// liveness information to determine the exact operation after which we have to
41// insert the dealloc. However, the algorithm supports introducing clone buffers
42// and placing deallocs in safe locations to ensure that all buffers will be
43// freed in the end.
44//
45// TODO:
46// The current implementation does not support explicit-control-flow loops and
47// the resulting code will be invalid with respect to program semantics.
48// However, structured control-flow loops are fully supported. Furthermore, it
49// doesn't accept functions which return buffers already.
50//
51//===----------------------------------------------------------------------===//
52
53#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
54
55#include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58#include "mlir/Dialect/Func/IR/FuncOps.h"
59#include "mlir/Dialect/MemRef/IR/MemRef.h"
60#include "llvm/ADT/SetOperations.h"
61
62namespace mlir {
63namespace bufferization {
64#define GEN_PASS_DEF_BUFFERDEALLOCATION
65#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
66} // namespace bufferization
67} // namespace mlir
68
69using namespace mlir;
70using namespace mlir::bufferization;
71
72/// Walks over all immediate return-like terminators in the given region.
73static LogicalResult walkReturnOperations(
74 Region *region,
75 llvm::function_ref<LogicalResult(RegionBranchTerminatorOpInterface)> func) {
76 for (Block &block : *region) {
77 Operation *terminator = block.getTerminator();
78 // Skip non region-return-like terminators.
79 if (auto regionTerminator =
80 dyn_cast<RegionBranchTerminatorOpInterface>(terminator)) {
81 if (failed(func(regionTerminator)))
82 return failure();
83 }
84 }
85 return success();
86}
87
88/// Checks if all operations that have at least one attached region implement
89/// the RegionBranchOpInterface. This is not required in edge cases, where we
90/// have a single attached region and the parent operation has no results.
91static bool validateSupportedControlFlow(Operation *op) {
92 WalkResult result = op->walk(callback: [&](Operation *operation) {
93 // Only check ops that are inside a function.
94 if (!operation->getParentOfType<func::FuncOp>())
95 return WalkResult::advance();
96
97 auto regions = operation->getRegions();
98 // Walk over all operations in a region and check if the operation has at
99 // least one region and implements the RegionBranchOpInterface. If there
100 // is an operation that does not fulfill this condition, we cannot apply
101 // the deallocation steps. Furthermore, we accept cases, where we have a
102 // region that returns no results, since, in that case, the intra-region
103 // control flow does not affect the transformation.
104 size_t size = regions.size();
105 if (((size == 1 && !operation->getResults().empty()) || size > 1) &&
106 !dyn_cast<RegionBranchOpInterface>(operation)) {
107 operation->emitError(message: "All operations with attached regions need to "
108 "implement the RegionBranchOpInterface.");
109 }
110
111 return WalkResult::advance();
112 });
113 return !result.wasSkipped();
114}
115
116namespace {
117
118//===----------------------------------------------------------------------===//
119// Backedges analysis
120//===----------------------------------------------------------------------===//
121
122/// A straight-forward program analysis which detects loop backedges induced by
123/// explicit control flow.
124class Backedges {
125public:
126 using BlockSetT = SmallPtrSet<Block *, 16>;
127 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
128
129public:
130 /// Constructs a new backedges analysis using the op provided.
131 Backedges(Operation *op) { recurse(op); }
132
133 /// Returns the number of backedges formed by explicit control flow.
134 size_t size() const { return edgeSet.size(); }
135
136 /// Returns the start iterator to loop over all backedges.
137 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
138
139 /// Returns the end iterator to loop over all backedges.
140 BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
141
142private:
143 /// Enters the current block and inserts a backedge into the `edgeSet` if we
144 /// have already visited the current block. The inserted edge links the given
145 /// `predecessor` with the `current` block.
146 bool enter(Block &current, Block *predecessor) {
147 bool inserted = visited.insert(Ptr: &current).second;
148 if (!inserted)
149 edgeSet.insert(V: std::make_pair(x&: predecessor, y: &current));
150 return inserted;
151 }
152
153 /// Leaves the current block.
154 void exit(Block &current) { visited.erase(Ptr: &current); }
155
156 /// Recurses into the given operation while taking all attached regions into
157 /// account.
158 void recurse(Operation *op) {
159 Block *current = op->getBlock();
160 // If the current op implements the `BranchOpInterface`, there can be
161 // cycles in the scope of all successor blocks.
162 if (isa<BranchOpInterface>(Val: op)) {
163 for (Block *succ : current->getSuccessors())
164 recurse(block&: *succ, predecessor: current);
165 }
166 // Recurse into all distinct regions and check for explicit control-flow
167 // loops.
168 for (Region &region : op->getRegions()) {
169 if (!region.empty())
170 recurse(block&: region.front(), predecessor: current);
171 }
172 }
173
174 /// Recurses into explicit control-flow structures that are given by
175 /// the successor relation defined on the block level.
176 void recurse(Block &block, Block *predecessor) {
177 // Try to enter the current block. If this is not possible, we are
178 // currently processing this block and can safely return here.
179 if (!enter(current&: block, predecessor))
180 return;
181
182 // Recurse into all operations and successor blocks.
183 for (Operation &op : block.getOperations())
184 recurse(op: &op);
185
186 // Leave the current block.
187 exit(current&: block);
188 }
189
190 /// Stores all blocks that are currently visited and on the processing stack.
191 BlockSetT visited;
192
193 /// Stores all backedges in the format (source, target).
194 BackedgeSetT edgeSet;
195};
196
197//===----------------------------------------------------------------------===//
198// BufferDeallocation
199//===----------------------------------------------------------------------===//
200
201/// The buffer deallocation transformation which ensures that all allocs in the
202/// program have a corresponding de-allocation. As a side-effect, it might also
203/// introduce clones that in turn leads to additional deallocations.
204class BufferDeallocation : public BufferPlacementTransformationBase {
205public:
206 using AliasAllocationMapT =
207 llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
208
209 BufferDeallocation(Operation *op)
210 : BufferPlacementTransformationBase(op), dominators(op),
211 postDominators(op) {}
212
213 /// Checks if all allocation operations either provide an already existing
214 /// deallocation operation or implement the AllocationOpInterface. In
215 /// addition, this method initializes the internal alias to
216 /// AllocationOpInterface mapping in order to get compatible
217 /// AllocationOpInterface implementations for aliases.
218 LogicalResult prepare() {
219 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
220 // Get the defining allocation operation.
221 Value alloc = std::get<0>(t: entry);
222 auto allocationInterface =
223 alloc.getDefiningOp<bufferization::AllocationOpInterface>();
224 // If there is no existing deallocation operation and no implementation of
225 // the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
226 if (!std::get<1>(t: entry) && !allocationInterface) {
227 return alloc.getDefiningOp()->emitError(
228 message: "Allocation is not deallocated explicitly nor does the operation "
229 "implement the AllocationOpInterface.");
230 }
231
232 // Register the current allocation interface implementation.
233 aliasToAllocations[alloc] = allocationInterface;
234
235 // Get the alias information for the current allocation node.
236 for (Value alias : aliases.resolve(value: alloc)) {
237 // TODO: check for incompatible implementations of the
238 // AllocationOpInterface. This could be realized by promoting the
239 // AllocationOpInterface to a DialectInterface.
240 aliasToAllocations[alias] = allocationInterface;
241 }
242 }
243 return success();
244 }
245
246 /// Performs the actual placement/creation of all temporary clone and dealloc
247 /// nodes.
248 LogicalResult deallocate() {
249 // Add additional clones that are required.
250 if (failed(result: introduceClones()))
251 return failure();
252
253 // Place deallocations for all allocation entries.
254 return placeDeallocs();
255 }
256
257private:
258 /// Introduces required clone operations to avoid memory leaks.
259 LogicalResult introduceClones() {
260 // Initialize the set of values that require a dedicated memory free
261 // operation since their operands cannot be safely deallocated in a post
262 // dominator.
263 SetVector<Value> valuesToFree;
264 llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
265 SmallVector<std::tuple<Value, Block *>, 8> toProcess;
266
267 // Check dominance relation for proper dominance properties. If the given
268 // value node does not dominate an alias, we will have to create a clone in
269 // order to free all buffers that can potentially leak into a post
270 // dominator.
271 auto findUnsafeValues = [&](Value source, Block *definingBlock) {
272 auto it = aliases.find(value: source);
273 if (it == aliases.end())
274 return;
275 for (Value value : it->second) {
276 if (valuesToFree.count(key: value) > 0)
277 continue;
278 Block *parentBlock = value.getParentBlock();
279 // Check whether we have to free this particular block argument or
280 // generic value. We have to free the current alias if it is either
281 // defined in a non-dominated block or it is defined in the same block
282 // but the current value is not dominated by the source value.
283 if (!dominators.dominates(a: definingBlock, b: parentBlock) ||
284 (definingBlock == parentBlock && isa<BlockArgument>(Val: value))) {
285 toProcess.emplace_back(Args&: value, Args&: parentBlock);
286 valuesToFree.insert(X: value);
287 } else if (visitedValues.insert(V: std::make_tuple(args&: value, args&: definingBlock))
288 .second)
289 toProcess.emplace_back(Args&: value, Args&: definingBlock);
290 }
291 };
292
293 // Detect possibly unsafe aliases starting from all allocations.
294 for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
295 Value allocValue = std::get<0>(t&: entry);
296 findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
297 }
298 // Try to find block arguments that require an explicit free operation
299 // until we reach a fix point.
300 while (!toProcess.empty()) {
301 auto current = toProcess.pop_back_val();
302 findUnsafeValues(std::get<0>(t&: current), std::get<1>(t&: current));
303 }
304
305 // Update buffer aliases to ensure that we free all buffers and block
306 // arguments at the correct locations.
307 aliases.remove(aliasValues: valuesToFree);
308
309 // Add new allocs and additional clone operations.
310 for (Value value : valuesToFree) {
311 if (failed(result: isa<BlockArgument>(Val: value)
312 ? introduceBlockArgCopy(blockArg: cast<BlockArgument>(Val&: value))
313 : introduceValueCopyForRegionResult(value)))
314 return failure();
315
316 // Register the value to require a final dealloc. Note that we do not have
317 // to assign a block here since we do not want to move the allocation node
318 // to another location.
319 allocs.registerAlloc(entry: std::make_tuple(args&: value, args: nullptr));
320 }
321 return success();
322 }
323
324 /// Introduces temporary clones in all predecessors and copies the source
325 /// values into the newly allocated buffers.
326 LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
327 // Allocate a buffer for the current block argument in the block of
328 // the associated value (which will be a predecessor block by
329 // definition).
330 Block *block = blockArg.getOwner();
331 for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
332 // Get the terminator and the value that will be passed to our
333 // argument.
334 Operation *terminator = (*it)->getTerminator();
335 auto branchInterface = cast<BranchOpInterface>(terminator);
336 SuccessorOperands operands =
337 branchInterface.getSuccessorOperands(it.getSuccessorIndex());
338
339 // Query the associated source value.
340 Value sourceValue = operands[blockArg.getArgNumber()];
341 if (!sourceValue) {
342 return failure();
343 }
344 // Wire new clone and successor operand.
345 // Create a new clone at the current location of the terminator.
346 auto clone = introduceCloneBuffers(sourceValue, terminator);
347 if (failed(clone))
348 return failure();
349 operands.slice(subStart: blockArg.getArgNumber(), subLen: 1).assign(*clone);
350 }
351
352 // Check whether the block argument has implicitly defined predecessors via
353 // the RegionBranchOpInterface. This can be the case if the current block
354 // argument belongs to the first block in a region and the parent operation
355 // implements the RegionBranchOpInterface.
356 Region *argRegion = block->getParent();
357 Operation *parentOp = argRegion->getParentOp();
358 RegionBranchOpInterface regionInterface;
359 if (&argRegion->front() != block ||
360 !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
361 return success();
362
363 if (failed(introduceClonesForRegionSuccessors(
364 regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
365 [&](RegionSuccessor &successorRegion) {
366 // Find a predecessor of our argRegion.
367 return successorRegion.getSuccessor() == argRegion;
368 })))
369 return failure();
370
371 // Check whether the block argument belongs to an entry region of the
372 // parent operation. In this case, we have to introduce an additional clone
373 // for buffer that is passed to the argument.
374 SmallVector<RegionSuccessor, 2> successorRegions;
375 regionInterface.getSuccessorRegions(/*point=*/RegionBranchPoint::parent(),
376 successorRegions);
377 auto *it =
378 llvm::find_if(Range&: successorRegions, P: [&](RegionSuccessor &successorRegion) {
379 return successorRegion.getSuccessor() == argRegion;
380 });
381 if (it == successorRegions.end())
382 return success();
383
384 // Determine the actual operand to introduce a clone for and rewire the
385 // operand to point to the clone instead.
386 auto operands = regionInterface.getEntrySuccessorOperands(argRegion);
387 size_t operandIndex =
388 llvm::find(Range: it->getSuccessorInputs(), Val: blockArg).getIndex() +
389 operands.getBeginOperandIndex();
390 Value operand = parentOp->getOperand(idx: operandIndex);
391 assert(operand ==
392 operands[operandIndex - operands.getBeginOperandIndex()] &&
393 "region interface operands don't match parentOp operands");
394 auto clone = introduceCloneBuffers(sourceValue: operand, terminator: parentOp);
395 if (failed(clone))
396 return failure();
397
398 parentOp->setOperand(idx: operandIndex, value: *clone);
399 return success();
400 }
401
402 /// Introduces temporary clones in front of all associated nested-region
403 /// terminators and copies the source values into the newly allocated buffers.
404 LogicalResult introduceValueCopyForRegionResult(Value value) {
405 // Get the actual result index in the scope of the parent terminator.
406 Operation *operation = value.getDefiningOp();
407 auto regionInterface = cast<RegionBranchOpInterface>(operation);
408 // Filter successors that return to the parent operation.
409 auto regionPredicate = [&](RegionSuccessor &successorRegion) {
410 // If the RegionSuccessor has no associated successor, it will return to
411 // its parent operation.
412 return !successorRegion.getSuccessor();
413 };
414 // Introduce a clone for all region "results" that are returned to the
415 // parent operation. This is required since the parent's result value has
416 // been considered critical. Therefore, the algorithm assumes that a clone
417 // of a previously allocated buffer is returned by the operation (like in
418 // the case of a block argument).
419 return introduceClonesForRegionSuccessors(
420 regionInterface, operation->getRegions(), value, regionPredicate);
421 }
422
423 /// Introduces buffer clones for all terminators in the given regions. The
424 /// regionPredicate is applied to every successor region in order to restrict
425 /// the clones to specific regions.
426 template <typename TPredicate>
427 LogicalResult introduceClonesForRegionSuccessors(
428 RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
429 Value argValue, const TPredicate &regionPredicate) {
430 for (Region &region : regions) {
431 // Query the regionInterface to get all successor regions of the current
432 // one.
433 SmallVector<RegionSuccessor, 2> successorRegions;
434 regionInterface.getSuccessorRegions(region, successorRegions);
435 // Try to find a matching region successor.
436 RegionSuccessor *regionSuccessor =
437 llvm::find_if(successorRegions, regionPredicate);
438 if (regionSuccessor == successorRegions.end())
439 continue;
440 // Get the operand index in the context of the current successor input
441 // bindings.
442 size_t operandIndex =
443 llvm::find(Range: regionSuccessor->getSuccessorInputs(), Val: argValue)
444 .getIndex();
445
446 // Iterate over all immediate terminator operations to introduce
447 // new buffer allocations. Thereby, the appropriate terminator operand
448 // will be adjusted to point to the newly allocated buffer instead.
449 if (failed(walkReturnOperations(
450 &region, [&](RegionBranchTerminatorOpInterface terminator) {
451 // Get the actual mutable operands for this terminator op.
452 auto terminatorOperands =
453 terminator.getMutableSuccessorOperands(*regionSuccessor);
454 // Extract the source value from the current terminator.
455 // This conversion needs to exist on a separate line due to a
456 // bug in GCC conversion analysis.
457 OperandRange immutableTerminatorOperands = terminatorOperands;
458 Value sourceValue = immutableTerminatorOperands[operandIndex];
459 // Create a new clone at the current location of the terminator.
460 auto clone = introduceCloneBuffers(sourceValue, terminator);
461 if (failed(clone))
462 return failure();
463 // Wire clone and terminator operand.
464 terminatorOperands.slice(operandIndex, 1).assign(*clone);
465 return success();
466 })))
467 return failure();
468 }
469 return success();
470 }
471
472 /// Creates a new memory allocation for the given source value and clones
473 /// its content into the newly allocated buffer. The terminator operation is
474 /// used to insert the clone operation at the right place.
475 FailureOr<Value> introduceCloneBuffers(Value sourceValue,
476 Operation *terminator) {
477 // Avoid multiple clones of the same source value. This can happen in the
478 // presence of loops when a branch acts as a backedge while also having
479 // another successor that returns to its parent operation. Note: that
480 // copying copied buffers can introduce memory leaks since the invariant of
481 // BufferDeallocation assumes that a buffer will be only cloned once into a
482 // temporary buffer. Hence, the construction of clone chains introduces
483 // additional allocations that are not tracked automatically by the
484 // algorithm.
485 if (clonedValues.contains(Ptr: sourceValue))
486 return sourceValue;
487 // Create a new clone operation that copies the contents of the old
488 // buffer to the new one.
489 auto clone = buildClone(op: terminator, alloc: sourceValue);
490 if (succeeded(result: clone)) {
491 // Remember the clone of original source value.
492 clonedValues.insert(Ptr: *clone);
493 }
494 return clone;
495 }
496
497 /// Finds correct dealloc positions according to the algorithm described at
498 /// the top of the file for all alloc nodes and block arguments that can be
499 /// handled by this analysis.
500 LogicalResult placeDeallocs() {
501 // Move or insert deallocs using the previously computed information.
502 // These deallocations will be linked to their associated allocation nodes
503 // since they don't have any aliases that can (potentially) increase their
504 // liveness.
505 for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
506 Value alloc = std::get<0>(t: entry);
507 auto aliasesSet = aliases.resolve(value: alloc);
508 assert(!aliasesSet.empty() && "must contain at least one alias");
509
510 // Determine the actual block to place the dealloc and get liveness
511 // information.
512 Block *placementBlock =
513 findCommonDominator(value: alloc, values: aliasesSet, doms: postDominators);
514 const LivenessBlockInfo *livenessInfo =
515 liveness.getLiveness(block: placementBlock);
516
517 // We have to ensure that the dealloc will be after the last use of all
518 // aliases of the given value. We first assume that there are no uses in
519 // the placementBlock and that we can safely place the dealloc at the
520 // beginning.
521 Operation *endOperation = &placementBlock->front();
522
523 // Iterate over all aliases and ensure that the endOperation will point
524 // to the last operation of all potential aliases in the placementBlock.
525 for (Value alias : aliasesSet) {
526 // Ensure that the start operation is at least the defining operation of
527 // the current alias to avoid invalid placement of deallocs for aliases
528 // without any uses.
529 Operation *beforeOp = endOperation;
530 if (alias.getDefiningOp() &&
531 !(beforeOp = placementBlock->findAncestorOpInBlock(
532 op&: *alias.getDefiningOp())))
533 continue;
534
535 Operation *aliasEndOperation =
536 livenessInfo->getEndOperation(value: alias, startOperation: beforeOp);
537 // Check whether the aliasEndOperation lies in the desired block and
538 // whether it is behind the current endOperation. If yes, this will be
539 // the new endOperation.
540 if (aliasEndOperation->getBlock() == placementBlock &&
541 endOperation->isBeforeInBlock(other: aliasEndOperation))
542 endOperation = aliasEndOperation;
543 }
544 // endOperation is the last operation behind which we can safely store
545 // the dealloc taking all potential aliases into account.
546
547 // If there is an existing dealloc, move it to the right place.
548 Operation *deallocOperation = std::get<1>(t: entry);
549 if (deallocOperation) {
550 deallocOperation->moveAfter(existingOp: endOperation);
551 } else {
552 // If the Dealloc position is at the terminator operation of the
553 // block, then the value should escape from a deallocation.
554 Operation *nextOp = endOperation->getNextNode();
555 if (!nextOp)
556 continue;
557 // If there is no dealloc node, insert one in the right place.
558 if (failed(result: buildDealloc(op: nextOp, alloc)))
559 return failure();
560 }
561 }
562 return success();
563 }
564
565 /// Builds a deallocation operation compatible with the given allocation
566 /// value. If there is no registered AllocationOpInterface implementation for
567 /// the given value (e.g. in the case of a function parameter), this method
568 /// builds a memref::DeallocOp.
569 LogicalResult buildDealloc(Operation *op, Value alloc) {
570 OpBuilder builder(op);
571 auto it = aliasToAllocations.find(alloc);
572 if (it != aliasToAllocations.end()) {
573 // Call the allocation op interface to build a supported and
574 // compatible deallocation operation.
575 auto dealloc = it->second.buildDealloc(builder, alloc);
576 if (!dealloc)
577 return op->emitError()
578 << "allocations without compatible deallocations are "
579 "not supported";
580 } else {
581 // Build a "default" DeallocOp for unknown allocation sources.
582 builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
583 }
584 return success();
585 }
586
587 /// Builds a clone operation compatible with the given allocation value. If
588 /// there is no registered AllocationOpInterface implementation for the given
589 /// value (e.g. in the case of a function parameter), this method builds a
590 /// bufferization::CloneOp.
591 FailureOr<Value> buildClone(Operation *op, Value alloc) {
592 OpBuilder builder(op);
593 auto it = aliasToAllocations.find(alloc);
594 if (it != aliasToAllocations.end()) {
595 // Call the allocation op interface to build a supported and
596 // compatible clone operation.
597 auto clone = it->second.buildClone(builder, alloc);
598 if (clone)
599 return *clone;
600 return (LogicalResult)(op->emitError()
601 << "allocations without compatible clone ops "
602 "are not supported");
603 }
604 // Build a "default" CloneOp for unknown allocation sources.
605 return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
606 .getResult();
607 }
608
609 /// The dominator info to find the appropriate start operation to move the
610 /// allocs.
611 DominanceInfo dominators;
612
613 /// The post dominator info to move the dependent allocs in the right
614 /// position.
615 PostDominanceInfo postDominators;
616
617 /// Stores already cloned buffers to avoid additional clones of clones.
618 ValueSetT clonedValues;
619
620 /// Maps aliases to their source allocation interfaces (inverse mapping).
621 AliasAllocationMapT aliasToAllocations;
622};
623
624//===----------------------------------------------------------------------===//
625// BufferDeallocationPass
626//===----------------------------------------------------------------------===//
627
628/// The actual buffer deallocation pass that inserts and moves dealloc nodes
629/// into the right positions. Furthermore, it inserts additional clones if
630/// necessary. It uses the algorithm described at the top of the file.
631struct BufferDeallocationPass
632 : public bufferization::impl::BufferDeallocationBase<
633 BufferDeallocationPass> {
634 void getDependentDialects(DialectRegistry &registry) const override {
635 registry.insert<bufferization::BufferizationDialect>();
636 registry.insert<memref::MemRefDialect>();
637 }
638
639 void runOnOperation() override {
640 func::FuncOp func = getOperation();
641 if (func.isExternal())
642 return;
643
644 if (failed(deallocateBuffers(func)))
645 signalPassFailure();
646 }
647};
648
649} // namespace
650
651LogicalResult bufferization::deallocateBuffers(Operation *op) {
652 if (isa<ModuleOp>(Val: op)) {
653 WalkResult result = op->walk([&](func::FuncOp funcOp) {
654 if (failed(deallocateBuffers(funcOp)))
655 return WalkResult::interrupt();
656 return WalkResult::advance();
657 });
658 return success(isSuccess: !result.wasInterrupted());
659 }
660
661 // Ensure that there are supported loops only.
662 Backedges backedges(op);
663 if (backedges.size()) {
664 op->emitError(message: "Only structured control-flow loops are supported.");
665 return failure();
666 }
667
668 // Check that the control flow structures are supported.
669 if (!validateSupportedControlFlow(op))
670 return failure();
671
672 // Gather all required allocation nodes and prepare the deallocation phase.
673 BufferDeallocation deallocation(op);
674
675 // Check for supported AllocationOpInterface implementations and prepare the
676 // internal deallocation pass.
677 if (failed(result: deallocation.prepare()))
678 return failure();
679
680 // Place all required temporary clone and dealloc nodes.
681 if (failed(result: deallocation.deallocate()))
682 return failure();
683
684 return success();
685}
686
687//===----------------------------------------------------------------------===//
688// BufferDeallocationPass construction
689//===----------------------------------------------------------------------===//
690
691std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
692 return std::make_unique<BufferDeallocationPass>();
693}
694

source code of mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp