BufferDeallocation.cpp source code [mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp]

1	//===- BufferDeallocation.cpp - the impl for buffer deallocation ----------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements logic for computing correct alloc and dealloc positions.
10	// Furthermore, buffer deallocation also adds required new clone operations to
11	// ensure that all buffers are deallocated. The main class is the
12	// BufferDeallocationPass class that implements the underlying algorithm. In
13	// order to put allocations and deallocations at safe positions, it is
14	// significantly important to put them into the correct blocks. However, the
15	// liveness analysis does not pay attention to aliases, which can occur due to
16	// branches (and their associated block arguments) in general. For this purpose,
17	// BufferDeallocation firstly finds all possible aliases for a single value
18	// (using the BufferViewFlowAnalysis class). Consider the following example:
19	//
20	// ^bb0(%arg0):
21	// cf.cond_br %cond, ^bb1, ^bb2
22	// ^bb1:
23	// cf.br ^exit(%arg0)
24	// ^bb2:
25	// %new_value = ...
26	// cf.br ^exit(%new_value)
27	// ^exit(%arg1):
28	// return %arg1;
29	//
30	// We should place the dealloc for %new_value in exit. However, we have to free
31	// the buffer in the same block, because it cannot be freed in the post
32	// dominator. However, this requires a new clone buffer for %arg1 that will
33	// contain the actual contents. Using the class BufferViewFlowAnalysis, we
34	// will find out that %new_value has a potential alias %arg1. In order to find
35	// the dealloc position we have to find all potential aliases, iterate over
36	// their uses and find the common post-dominator block (note that additional
37	// clones and buffers remove potential aliases and will influence the placement
38	// of the deallocs). In all cases, the computed block can be safely used to free
39	// the %new_value buffer (may be exit or bb2) as it will die and we can use
40	// liveness information to determine the exact operation after which we have to
41	// insert the dealloc. However, the algorithm supports introducing clone buffers
42	// and placing deallocs in safe locations to ensure that all buffers will be
43	// freed in the end.
44	//
45	// TODO:
46	// The current implementation does not support explicit-control-flow loops and
47	// the resulting code will be invalid with respect to program semantics.
48	// However, structured control-flow loops are fully supported. Furthermore, it
49	// doesn't accept functions which return buffers already.
50	//
51	//===----------------------------------------------------------------------===//
52
53	#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
54
55	#include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
56	#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
57	#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
58	#include "mlir/Dialect/Func/IR/FuncOps.h"
59	#include "mlir/Dialect/MemRef/IR/MemRef.h"
60	#include "llvm/ADT/SetOperations.h"
61
62	namespace mlir {
63	namespace bufferization {
64	#define GEN_PASS_DEF_BUFFERDEALLOCATION
65	#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
66	} // namespace bufferization
67	} // namespace mlir
68
69	using namespace mlir;
70	using namespace mlir::bufferization;
71
72	/// Walks over all immediate return-like terminators in the given region.
73	static LogicalResult walkReturnOperations(
74	Region *region,
75	llvm::function_ref<LogicalResult(RegionBranchTerminatorOpInterface)> func) {
76	for (Block &block : *region) {
77	Operation *terminator = block.getTerminator();
78	// Skip non region-return-like terminators.
79	if (auto regionTerminator =
80	dyn_cast<RegionBranchTerminatorOpInterface>(terminator)) {
81	if (failed(func(regionTerminator)))
82	return failure();
83	}
84	}
85	return success();
86	}
87
88	/// Checks if all operations that have at least one attached region implement
89	/// the RegionBranchOpInterface. This is not required in edge cases, where we
90	/// have a single attached region and the parent operation has no results.
91	static bool validateSupportedControlFlow(Operation *op) {
92	WalkResult result = op->walk(callback: [&](Operation *operation) {
93	// Only check ops that are inside a function.
94	if (!operation->getParentOfType<func::FuncOp>())
95	return WalkResult::advance();
96
97	auto regions = operation->getRegions();
98	// Walk over all operations in a region and check if the operation has at
99	// least one region and implements the RegionBranchOpInterface. If there
100	// is an operation that does not fulfill this condition, we cannot apply
101	// the deallocation steps. Furthermore, we accept cases, where we have a
102	// region that returns no results, since, in that case, the intra-region
103	// control flow does not affect the transformation.
104	size_t size = regions.size();
105	if (((size == `1` && !operation->getResults().empty()) \|\| size > `1`) &&
106	!dyn_cast<RegionBranchOpInterface>(operation)) {
107	operation->emitError(message: "All operations with attached regions need to "
108	"implement the RegionBranchOpInterface.");
109	}
110
111	return WalkResult::advance();
112	});
113	return !result.wasSkipped();
114	}
115
116	namespace {
117
118	//===----------------------------------------------------------------------===//
119	// Backedges analysis
120	//===----------------------------------------------------------------------===//
121
122	/// A straight-forward program analysis which detects loop backedges induced by
123	/// explicit control flow.
124	class Backedges {
125	public:
126	using BlockSetT = SmallPtrSet<Block *, `16`>;
127	using BackedgeSetT = llvm::DenseSet<std::pair<Block , Block >>;
128
129	public:
130	/// Constructs a new backedges analysis using the op provided.
131	Backedges(Operation *op) { recurse(op); }
132
133	/// Returns the number of backedges formed by explicit control flow.
134	size_t size() const { return edgeSet.size(); }
135
136	/// Returns the start iterator to loop over all backedges.
137	BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
138
139	/// Returns the end iterator to loop over all backedges.
140	BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
141
142	private:
143	/// Enters the current block and inserts a backedge into the `edgeSet` if we
144	/// have already visited the current block. The inserted edge links the given
145	/// `predecessor` with the `current` block.
146	bool enter(Block &current, Block *predecessor) {
147	bool inserted = visited.insert(Ptr: &current).second;
148	if (!inserted)
149	edgeSet.insert(V: std::make_pair(x&: predecessor, y: &current));
150	return inserted;
151	}
152
153	/// Leaves the current block.
154	void exit(Block &current) { visited.erase(Ptr: &current); }
155
156	/// Recurses into the given operation while taking all attached regions into
157	/// account.
158	void recurse(Operation *op) {
159	Block *current = op->getBlock();
160	// If the current op implements the `BranchOpInterface`, there can be
161	// cycles in the scope of all successor blocks.
162	if (isa<BranchOpInterface>(Val: op)) {
163	for (Block *succ : current->getSuccessors())
164	recurse(block&: *succ, predecessor: current);
165	}
166	// Recurse into all distinct regions and check for explicit control-flow
167	// loops.
168	for (Region &region : op->getRegions()) {
169	if (!region.empty())
170	recurse(block&: region.front(), predecessor: current);
171	}
172	}
173
174	/// Recurses into explicit control-flow structures that are given by
175	/// the successor relation defined on the block level.
176	void recurse(Block &block, Block *predecessor) {
177	// Try to enter the current block. If this is not possible, we are
178	// currently processing this block and can safely return here.
179	if (!enter(current&: block, predecessor))
180	return;
181
182	// Recurse into all operations and successor blocks.
183	for (Operation &op : block.getOperations())
184	recurse(op: &op);
185
186	// Leave the current block.
187	exit(current&: block);
188	}
189
190	/// Stores all blocks that are currently visited and on the processing stack.
191	BlockSetT visited;
192
193	/// Stores all backedges in the format (source, target).
194	BackedgeSetT edgeSet;
195	};
196
197	//===----------------------------------------------------------------------===//
198	// BufferDeallocation
199	//===----------------------------------------------------------------------===//
200
201	/// The buffer deallocation transformation which ensures that all allocs in the
202	/// program have a corresponding de-allocation. As a side-effect, it might also
203	/// introduce clones that in turn leads to additional deallocations.
204	class BufferDeallocation : public BufferPlacementTransformationBase {
205	public:
206	using AliasAllocationMapT =
207	llvm::DenseMap<Value, bufferization::AllocationOpInterface>;
208
209	BufferDeallocation(Operation *op)
210	: BufferPlacementTransformationBase (op), dominators (op),
211	postDominators (op) {}
212
213	/// Checks if all allocation operations either provide an already existing
214	/// deallocation operation or implement the AllocationOpInterface. In
215	/// addition, this method initializes the internal alias to
216	/// AllocationOpInterface mapping in order to get compatible
217	/// AllocationOpInterface implementations for aliases.
218	LogicalResult prepare() {
219	for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
220	// Get the defining allocation operation.
221	Value alloc = std::get<`0`>(t: entry);
222	auto allocationInterface =
223	alloc.getDefiningOp<bufferization::AllocationOpInterface>();
224	// If there is no existing deallocation operation and no implementation of
225	// the AllocationOpInterface, we cannot apply the BufferDeallocation pass.
226	if (!std::get<`1`>(t: entry) && !allocationInterface) {
227	return alloc.getDefiningOp()->emitError(
228	message: "Allocation is not deallocated explicitly nor does the operation "
229	"implement the AllocationOpInterface.");
230	}
231
232	// Register the current allocation interface implementation.
233	aliasToAllocations[alloc] = allocationInterface;
234
235	// Get the alias information for the current allocation node.
236	for (Value alias : aliases.resolve(value: alloc)) {
237	// TODO: check for incompatible implementations of the
238	// AllocationOpInterface. This could be realized by promoting the
239	// AllocationOpInterface to a DialectInterface.
240	aliasToAllocations[alias] = allocationInterface;
241	}
242	}
243	return success();
244	}
245
246	/// Performs the actual placement/creation of all temporary clone and dealloc
247	/// nodes.
248	LogicalResult deallocate() {
249	// Add additional clones that are required.
250	if (failed(result: introduceClones()))
251	return failure();
252
253	// Place deallocations for all allocation entries.
254	return placeDeallocs();
255	}
256
257	private:
258	/// Introduces required clone operations to avoid memory leaks.
259	LogicalResult introduceClones() {
260	// Initialize the set of values that require a dedicated memory free
261	// operation since their operands cannot be safely deallocated in a post
262	// dominator.
263	SetVector<Value> valuesToFree;
264	llvm::SmallDenseSet<std::tuple<Value, Block *>> visitedValues;
265	SmallVector<std::tuple<Value, Block *>, `8`> toProcess;
266
267	// Check dominance relation for proper dominance properties. If the given
268	// value node does not dominate an alias, we will have to create a clone in
269	// order to free all buffers that can potentially leak into a post
270	// dominator.
271	auto findUnsafeValues = [&](Value source, Block *definingBlock) {
272	auto it = aliases.find(value: source);
273	if (it == aliases.end())
274	return;
275	for (Value value : it ->second) {
276	if (valuesToFree.count(key: value) > `0`)
277	continue;
278	Block *parentBlock = value.getParentBlock();
279	// Check whether we have to free this particular block argument or
280	// generic value. We have to free the current alias if it is either
281	// defined in a non-dominated block or it is defined in the same block
282	// but the current value is not dominated by the source value.
283	if (!dominators.dominates(a: definingBlock, b: parentBlock) \|\|
284	(definingBlock == parentBlock && isa<BlockArgument>(Val: value))) {
285	toProcess.emplace_back(Args&: value, Args&: parentBlock);
286	valuesToFree.insert(X: value);
287	} else if (visitedValues.insert(V: std::make_tuple(args&: value, args&: definingBlock))
288	.second)
289	toProcess.emplace_back(Args&: value, Args&: definingBlock);
290	}
291	};
292
293	// Detect possibly unsafe aliases starting from all allocations.
294	for (BufferPlacementAllocs::AllocEntry &entry : allocs) {
295	Value allocValue = std::get<`0`>(t&: entry);
296	findUnsafeValues(allocValue, allocValue.getDefiningOp()->getBlock());
297	}
298	// Try to find block arguments that require an explicit free operation
299	// until we reach a fix point.
300	while (!toProcess.empty()) {
301	auto current = toProcess.pop_back_val();
302	findUnsafeValues(std::get<`0`>(t&: current), std::get<`1`>(t&: current));
303	}
304
305	// Update buffer aliases to ensure that we free all buffers and block
306	// arguments at the correct locations.
307	aliases.remove(aliasValues: valuesToFree);
308
309	// Add new allocs and additional clone operations.
310	for (Value value : valuesToFree) {
311	if (failed(result: isa<BlockArgument>(Val: value)
312	? introduceBlockArgCopy(blockArg: cast<BlockArgument>(Val&: value))
313	: introduceValueCopyForRegionResult(value)))
314	return failure();
315
316	// Register the value to require a final dealloc. Note that we do not have
317	// to assign a block here since we do not want to move the allocation node
318	// to another location.
319	allocs.registerAlloc(entry: std::make_tuple(args&: value, args: nullptr));
320	}
321	return success();
322	}
323
324	/// Introduces temporary clones in all predecessors and copies the source
325	/// values into the newly allocated buffers.
326	LogicalResult introduceBlockArgCopy(BlockArgument blockArg) {
327	// Allocate a buffer for the current block argument in the block of
328	// the associated value (which will be a predecessor block by
329	// definition).
330	Block *block = blockArg.getOwner();
331	for (auto it = block->pred_begin(), e = block->pred_end(); it != e; ++it) {
332	// Get the terminator and the value that will be passed to our
333	// argument.
334	Operation terminator = (it)->getTerminator();
335	auto branchInterface = cast<BranchOpInterface>(terminator);
336	SuccessorOperands operands =
337	branchInterface.getSuccessorOperands(it.getSuccessorIndex());
338
339	// Query the associated source value.
340	Value sourceValue = operands [blockArg.getArgNumber()];
341	if (!sourceValue) {
342	return failure();
343	}
344	// Wire new clone and successor operand.
345	// Create a new clone at the current location of the terminator.
346	auto clone = introduceCloneBuffers(sourceValue, terminator);
347	if (failed(clone))
348	return failure();
349	operands.slice(subStart: blockArg.getArgNumber(), subLen: `1`).assign(*clone);
350	}
351
352	// Check whether the block argument has implicitly defined predecessors via
353	// the RegionBranchOpInterface. This can be the case if the current block
354	// argument belongs to the first block in a region and the parent operation
355	// implements the RegionBranchOpInterface.
356	Region *argRegion = block->getParent();
357	Operation *parentOp = argRegion->getParentOp();
358	RegionBranchOpInterface regionInterface;
359	if (&argRegion->front() != block \|\|
360	!(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
361	return success();
362
363	if (failed(introduceClonesForRegionSuccessors(
364	regionInterface, argRegion->getParentOp()->getRegions(), blockArg,
365	[&](RegionSuccessor &successorRegion) {
366	// Find a predecessor of our argRegion.
367	return successorRegion.getSuccessor() == argRegion;
368	})))
369	return failure();
370
371	// Check whether the block argument belongs to an entry region of the
372	// parent operation. In this case, we have to introduce an additional clone
373	// for buffer that is passed to the argument.
374	SmallVector<RegionSuccessor, `2`> successorRegions;
375	regionInterface.getSuccessorRegions(/point=/RegionBranchPoint::parent(),
376	successorRegions);
377	auto *it =
378	llvm::find_if(Range&: successorRegions, P: [&](RegionSuccessor &successorRegion) {
379	return successorRegion.getSuccessor() == argRegion;
380	});
381	if (it == successorRegions.end())
382	return success();
383
384	// Determine the actual operand to introduce a clone for and rewire the
385	// operand to point to the clone instead.
386	auto operands = regionInterface.getEntrySuccessorOperands(argRegion);
387	size_t operandIndex =
388	llvm::find(Range: it->getSuccessorInputs(), Val: blockArg).getIndex() +
389	operands.getBeginOperandIndex();
390	Value operand = parentOp->getOperand(idx: operandIndex);
391	assert(operand ==
392	operands[operandIndex - operands.getBeginOperandIndex()] &&
393	"region interface operands don't match parentOp operands");
394	auto clone = introduceCloneBuffers(sourceValue: operand, terminator: parentOp);
395	if (failed(clone))
396	return failure();
397
398	parentOp->setOperand(idx: operandIndex, value: *clone);
399	return success();
400	}
401
402	/// Introduces temporary clones in front of all associated nested-region
403	/// terminators and copies the source values into the newly allocated buffers.
404	LogicalResult introduceValueCopyForRegionResult(Value value) {
405	// Get the actual result index in the scope of the parent terminator.
406	Operation *operation = value.getDefiningOp();
407	auto regionInterface = cast<RegionBranchOpInterface>(operation);
408	// Filter successors that return to the parent operation.
409	auto regionPredicate = [&](RegionSuccessor &successorRegion) {
410	// If the RegionSuccessor has no associated successor, it will return to
411	// its parent operation.
412	return !successorRegion.getSuccessor();
413	};
414	// Introduce a clone for all region "results" that are returned to the
415	// parent operation. This is required since the parent's result value has
416	// been considered critical. Therefore, the algorithm assumes that a clone
417	// of a previously allocated buffer is returned by the operation (like in
418	// the case of a block argument).
419	return introduceClonesForRegionSuccessors(
420	regionInterface, operation->getRegions(), value, regionPredicate);
421	}
422
423	/// Introduces buffer clones for all terminators in the given regions. The
424	/// regionPredicate is applied to every successor region in order to restrict
425	/// the clones to specific regions.
426	template <typename TPredicate>
427	LogicalResult introduceClonesForRegionSuccessors(
428	RegionBranchOpInterface regionInterface, MutableArrayRef<Region> regions,
429	Value argValue, const TPredicate &regionPredicate) {
430	for (Region &region : regions) {
431	// Query the regionInterface to get all successor regions of the current
432	// one.
433	SmallVector<RegionSuccessor, `2`> successorRegions;
434	regionInterface.getSuccessorRegions(region, successorRegions);
435	// Try to find a matching region successor.
436	RegionSuccessor *regionSuccessor =
437	llvm::find_if(successorRegions, regionPredicate);
438	if (regionSuccessor == successorRegions.end())
439	continue;
440	// Get the operand index in the context of the current successor input
441	// bindings.
442	size_t operandIndex =
443	llvm::find(Range: regionSuccessor->getSuccessorInputs(), Val: argValue)
444	.getIndex();
445
446	// Iterate over all immediate terminator operations to introduce
447	// new buffer allocations. Thereby, the appropriate terminator operand
448	// will be adjusted to point to the newly allocated buffer instead.
449	if (failed(walkReturnOperations(
450	&region, [&](RegionBranchTerminatorOpInterface terminator) {
451	// Get the actual mutable operands for this terminator op.
452	auto terminatorOperands =
453	terminator.getMutableSuccessorOperands(*regionSuccessor);
454	// Extract the source value from the current terminator.
455	// This conversion needs to exist on a separate line due to a
456	// bug in GCC conversion analysis.
457	OperandRange immutableTerminatorOperands = terminatorOperands;
458	Value sourceValue = immutableTerminatorOperands[operandIndex];
459	// Create a new clone at the current location of the terminator.
460	auto clone = introduceCloneBuffers(sourceValue, terminator);
461	if (failed(clone))
462	return failure();
463	// Wire clone and terminator operand.
464	terminatorOperands.slice(operandIndex, `1`).assign(*clone);
465	return success();
466	})))
467	return failure();
468	}
469	return success();
470	}
471
472	/// Creates a new memory allocation for the given source value and clones
473	/// its content into the newly allocated buffer. The terminator operation is
474	/// used to insert the clone operation at the right place.
475	FailureOr<Value> introduceCloneBuffers(Value sourceValue,
476	Operation *terminator) {
477	// Avoid multiple clones of the same source value. This can happen in the
478	// presence of loops when a branch acts as a backedge while also having
479	// another successor that returns to its parent operation. Note: that
480	// copying copied buffers can introduce memory leaks since the invariant of
481	// BufferDeallocation assumes that a buffer will be only cloned once into a
482	// temporary buffer. Hence, the construction of clone chains introduces
483	// additional allocations that are not tracked automatically by the
484	// algorithm.
485	if (clonedValues.contains(Ptr: sourceValue))
486	return sourceValue;
487	// Create a new clone operation that copies the contents of the old
488	// buffer to the new one.
489	auto clone = buildClone(op: terminator, alloc: sourceValue);
490	if (succeeded(result: clone)) {
491	// Remember the clone of original source value.
492	clonedValues.insert(Ptr: *clone);
493	}
494	return clone;
495	}
496
497	/// Finds correct dealloc positions according to the algorithm described at
498	/// the top of the file for all alloc nodes and block arguments that can be
499	/// handled by this analysis.
500	LogicalResult placeDeallocs() {
501	// Move or insert deallocs using the previously computed information.
502	// These deallocations will be linked to their associated allocation nodes
503	// since they don't have any aliases that can (potentially) increase their
504	// liveness.
505	for (const BufferPlacementAllocs::AllocEntry &entry : allocs) {
506	Value alloc = std::get<`0`>(t: entry);
507	auto aliasesSet = aliases.resolve(value: alloc);
508	assert(!aliasesSet.empty() && "must contain at least one alias");
509
510	// Determine the actual block to place the dealloc and get liveness
511	// information.
512	Block *placementBlock =
513	findCommonDominator(value: alloc, values: aliasesSet, doms: postDominators);
514	const LivenessBlockInfo *livenessInfo =
515	liveness.getLiveness(block: placementBlock);
516
517	// We have to ensure that the dealloc will be after the last use of all
518	// aliases of the given value. We first assume that there are no uses in
519	// the placementBlock and that we can safely place the dealloc at the
520	// beginning.
521	Operation *endOperation = &placementBlock->front();
522
523	// Iterate over all aliases and ensure that the endOperation will point
524	// to the last operation of all potential aliases in the placementBlock.
525	for (Value alias : aliasesSet) {
526	// Ensure that the start operation is at least the defining operation of
527	// the current alias to avoid invalid placement of deallocs for aliases
528	// without any uses.
529	Operation *beforeOp = endOperation;
530	if (alias.getDefiningOp() &&
531	!(beforeOp = placementBlock->findAncestorOpInBlock(
532	op&: *alias.getDefiningOp())))
533	continue;
534
535	Operation *aliasEndOperation =
536	livenessInfo->getEndOperation(value: alias, startOperation: beforeOp);
537	// Check whether the aliasEndOperation lies in the desired block and
538	// whether it is behind the current endOperation. If yes, this will be
539	// the new endOperation.
540	if (aliasEndOperation->getBlock() == placementBlock &&
541	endOperation->isBeforeInBlock(other: aliasEndOperation))
542	endOperation = aliasEndOperation;
543	}
544	// endOperation is the last operation behind which we can safely store
545	// the dealloc taking all potential aliases into account.
546
547	// If there is an existing dealloc, move it to the right place.
548	Operation *deallocOperation = std::get<`1`>(t: entry);
549	if (deallocOperation) {
550	deallocOperation->moveAfter(existingOp: endOperation);
551	} else {
552	// If the Dealloc position is at the terminator operation of the
553	// block, then the value should escape from a deallocation.
554	Operation *nextOp = endOperation->getNextNode();
555	if (!nextOp)
556	continue;
557	// If there is no dealloc node, insert one in the right place.
558	if (failed(result: buildDealloc(op: nextOp, alloc)))
559	return failure();
560	}
561	}
562	return success();
563	}
564
565	/// Builds a deallocation operation compatible with the given allocation
566	/// value. If there is no registered AllocationOpInterface implementation for
567	/// the given value (e.g. in the case of a function parameter), this method
568	/// builds a memref::DeallocOp.
569	LogicalResult buildDealloc(Operation *op, Value alloc) {
570	OpBuilder builder(op);
571	auto it = aliasToAllocations.find(alloc);
572	if (it != aliasToAllocations.end()) {
573	// Call the allocation op interface to build a supported and
574	// compatible deallocation operation.
575	auto dealloc = it->second.buildDealloc(builder, alloc);
576	if (!dealloc)
577	return op->emitError()
578	<< "allocations without compatible deallocations are "
579	"not supported";
580	} else {
581	// Build a "default" DeallocOp for unknown allocation sources.
582	builder.create<memref::DeallocOp>(alloc.getLoc(), alloc);
583	}
584	return success();
585	}
586
587	/// Builds a clone operation compatible with the given allocation value. If
588	/// there is no registered AllocationOpInterface implementation for the given
589	/// value (e.g. in the case of a function parameter), this method builds a
590	/// bufferization::CloneOp.
591	FailureOr<Value> buildClone(Operation *op, Value alloc) {
592	OpBuilder builder(op);
593	auto it = aliasToAllocations.find(alloc);
594	if (it != aliasToAllocations.end()) {
595	// Call the allocation op interface to build a supported and
596	// compatible clone operation.
597	auto clone = it->second.buildClone(builder, alloc);
598	if (clone)
599	return *clone;
600	return (LogicalResult)(op->emitError()
601	<< "allocations without compatible clone ops "
602	"are not supported");
603	}
604	// Build a "default" CloneOp for unknown allocation sources.
605	return builder.create<bufferization::CloneOp>(alloc.getLoc(), alloc)
606	.getResult();
607	}
608
609	/// The dominator info to find the appropriate start operation to move the
610	/// allocs.
611	DominanceInfo dominators;
612
613	/// The post dominator info to move the dependent allocs in the right
614	/// position.
615	PostDominanceInfo postDominators;
616
617	/// Stores already cloned buffers to avoid additional clones of clones.
618	ValueSetT clonedValues;
619
620	/// Maps aliases to their source allocation interfaces (inverse mapping).
621	AliasAllocationMapT aliasToAllocations;
622	};
623
624	//===----------------------------------------------------------------------===//
625	// BufferDeallocationPass
626	//===----------------------------------------------------------------------===//
627
628	/// The actual buffer deallocation pass that inserts and moves dealloc nodes
629	/// into the right positions. Furthermore, it inserts additional clones if
630	/// necessary. It uses the algorithm described at the top of the file.
631	struct BufferDeallocationPass
632	: public bufferization::impl::BufferDeallocationBase<
633	BufferDeallocationPass> {
634	void getDependentDialects(DialectRegistry &registry) const override {
635	registry.insert<bufferization::BufferizationDialect>();
636	registry.insert<memref::MemRefDialect>();
637	}
638
639	void runOnOperation() override {
640	func::FuncOp func = getOperation();
641	if (func.isExternal())
642	return;
643
644	if (failed(deallocateBuffers(func)))
645	signalPassFailure();
646	}
647	};
648
649	} // namespace
650
651	LogicalResult bufferization::deallocateBuffers(Operation *op) {
652	if (isa<ModuleOp>(Val: op)) {
653	WalkResult result = op->walk([&](func::FuncOp funcOp) {
654	if (failed(deallocateBuffers(funcOp)))
655	return WalkResult::interrupt();
656	return WalkResult::advance();
657	});
658	return success(isSuccess: !result.wasInterrupted());
659	}
660
661	// Ensure that there are supported loops only.
662	Backedges backedges(op);
663	if (backedges.size()) {
664	op->emitError(message: "Only structured control-flow loops are supported.");
665	return failure();
666	}
667
668	// Check that the control flow structures are supported.
669	if (!validateSupportedControlFlow(op))
670	return failure();
671
672	// Gather all required allocation nodes and prepare the deallocation phase.
673	BufferDeallocation deallocation(op);
674
675	// Check for supported AllocationOpInterface implementations and prepare the
676	// internal deallocation pass.
677	if (failed(result: deallocation.prepare()))
678	return failure();
679
680	// Place all required temporary clone and dealloc nodes.
681	if (failed(result: deallocation.deallocate()))
682	return failure();
683
684	return success();
685	}
686
687	//===----------------------------------------------------------------------===//
688	// BufferDeallocationPass construction
689	//===----------------------------------------------------------------------===//
690
691	std::unique_ptr<Pass> mlir::bufferization::createBufferDeallocationPass() {
692	return std::make_unique<BufferDeallocationPass>();
693	}
694

source code of mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp