1//===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for computing correct `bufferization.dealloc`
10// positions. Furthermore, buffer deallocation also adds required new clone
11// operations to ensure that memrefs returned by functions never alias an
12// argument.
13//
14// TODO:
15// The current implementation does not support explicit-control-flow loops and
16// the resulting code will be invalid with respect to program semantics.
17// However, structured control-flow loops are fully supported.
18//
19//===----------------------------------------------------------------------===//
20
21#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
22#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
23#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
24#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
25#include "mlir/Dialect/Func/IR/FuncOps.h"
26#include "mlir/Dialect/MemRef/IR/MemRef.h"
27#include "mlir/Dialect/SCF/IR/SCF.h"
28#include "mlir/IR/Iterators.h"
29#include "mlir/Interfaces/ControlFlowInterfaces.h"
30
31namespace mlir {
32namespace bufferization {
33#define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATIONPASS
34#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
35} // namespace bufferization
36} // namespace mlir
37
38using namespace mlir;
39using namespace mlir::bufferization;
40
41//===----------------------------------------------------------------------===//
42// Helpers
43//===----------------------------------------------------------------------===//
44
45static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
46 return builder.create<arith::ConstantOp>(loc, builder.getBoolAttr(value));
47}
48
49static bool isMemref(Value v) { return isa<BaseMemRefType>(Val: v.getType()); }
50
51/// Return "true" if the given op is guaranteed to have neither "Allocate" nor
52/// "Free" side effects.
53static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) {
54 if (isa<MemoryEffectOpInterface>(op))
55 return !hasEffect<MemoryEffects::Allocate>(op) &&
56 !hasEffect<MemoryEffects::Free>(op);
57 // If the op does not implement the MemoryEffectOpInterface but has has
58 // recursive memory effects, then this op in isolation (without its body) does
59 // not have any side effects. All the ops inside the regions of this op will
60 // be processed separately.
61 return op->hasTrait<OpTrait::HasRecursiveMemoryEffects>();
62}
63
64/// Return "true" if the given op has buffer semantics. I.e., it has buffer
65/// operands, buffer results and/or buffer region entry block arguments.
66static bool hasBufferSemantics(Operation *op) {
67 if (llvm::any_of(Range: op->getOperands(), P: isMemref) ||
68 llvm::any_of(Range: op->getResults(), P: isMemref))
69 return true;
70 for (Region &region : op->getRegions())
71 if (!region.empty())
72 if (llvm::any_of(Range: region.front().getArguments(), P: isMemref))
73 return true;
74 return false;
75}
76
77//===----------------------------------------------------------------------===//
78// Backedges analysis
79//===----------------------------------------------------------------------===//
80
81namespace {
82
83/// A straight-forward program analysis which detects loop backedges induced by
84/// explicit control flow.
85class Backedges {
86public:
87 using BlockSetT = SmallPtrSet<Block *, 16>;
88 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
89
90public:
91 /// Constructs a new backedges analysis using the op provided.
92 Backedges(Operation *op) { recurse(op); }
93
94 /// Returns the number of backedges formed by explicit control flow.
95 size_t size() const { return edgeSet.size(); }
96
97 /// Returns the start iterator to loop over all backedges.
98 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
99
100 /// Returns the end iterator to loop over all backedges.
101 BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
102
103private:
104 /// Enters the current block and inserts a backedge into the `edgeSet` if we
105 /// have already visited the current block. The inserted edge links the given
106 /// `predecessor` with the `current` block.
107 bool enter(Block &current, Block *predecessor) {
108 bool inserted = visited.insert(Ptr: &current).second;
109 if (!inserted)
110 edgeSet.insert(V: std::make_pair(x&: predecessor, y: &current));
111 return inserted;
112 }
113
114 /// Leaves the current block.
115 void exit(Block &current) { visited.erase(Ptr: &current); }
116
117 /// Recurses into the given operation while taking all attached regions into
118 /// account.
119 void recurse(Operation *op) {
120 Block *current = op->getBlock();
121 // If the current op implements the `BranchOpInterface`, there can be
122 // cycles in the scope of all successor blocks.
123 if (isa<BranchOpInterface>(Val: op)) {
124 for (Block *succ : current->getSuccessors())
125 recurse(block&: *succ, predecessor: current);
126 }
127 // Recurse into all distinct regions and check for explicit control-flow
128 // loops.
129 for (Region &region : op->getRegions()) {
130 if (!region.empty())
131 recurse(block&: region.front(), predecessor: current);
132 }
133 }
134
135 /// Recurses into explicit control-flow structures that are given by
136 /// the successor relation defined on the block level.
137 void recurse(Block &block, Block *predecessor) {
138 // Try to enter the current block. If this is not possible, we are
139 // currently processing this block and can safely return here.
140 if (!enter(current&: block, predecessor))
141 return;
142
143 // Recurse into all operations and successor blocks.
144 for (Operation &op : block.getOperations())
145 recurse(op: &op);
146
147 // Leave the current block.
148 exit(current&: block);
149 }
150
151 /// Stores all blocks that are currently visited and on the processing stack.
152 BlockSetT visited;
153
154 /// Stores all backedges in the format (source, target).
155 BackedgeSetT edgeSet;
156};
157
158} // namespace
159
160//===----------------------------------------------------------------------===//
161// BufferDeallocation
162//===----------------------------------------------------------------------===//
163
164namespace {
165/// The buffer deallocation transformation which ensures that all allocs in the
166/// program have a corresponding de-allocation.
167class BufferDeallocation {
168public:
169 BufferDeallocation(Operation *op, DeallocationOptions options,
170 SymbolTableCollection &symbolTables)
171 : state(op, symbolTables), options(options) {}
172
173 /// Performs the actual placement/creation of all dealloc operations.
174 LogicalResult deallocate(FunctionOpInterface op);
175
176private:
177 /// The base case for the recursive template below.
178 template <typename... T>
179 typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type
180 handleOp(Operation *op) {
181 return op;
182 }
183
184 /// Applies all the handlers of the interfaces in the template list
185 /// implemented by 'op'. In particular, if an operation implements more than
186 /// one of the interfaces in the template list, all the associated handlers
187 /// will be applied to the operation in the same order as the template list
188 /// specifies. If a handler reports a failure or removes the operation without
189 /// replacement (indicated by returning 'nullptr'), no further handlers are
190 /// applied and the return value is propagated to the caller of 'handleOp'.
191 ///
192 /// The interface handlers job is to update the deallocation state, most
193 /// importantly the ownership map and list of memrefs to potentially be
194 /// deallocated per block, but also to insert `bufferization.dealloc`
195 /// operations where needed. Obviously, no MemRefs that may be used at a later
196 /// point in the control-flow may be deallocated and the ownership map has to
197 /// be updated to reflect potential ownership changes caused by the dealloc
198 /// operation (e.g., if two interfaces on the same op insert a dealloc
199 /// operation each, the second one should query the ownership map and use them
200 /// as deallocation condition such that MemRefs already deallocated in the
201 /// first dealloc operation are not deallocated a second time (double-free)).
202 /// Note that currently only the interfaces on terminators may insert dealloc
203 /// operations and it is verified as a precondition that a terminator op must
204 /// implement exactly one of the interfaces handling dealloc insertion.
205 ///
206 /// The return value of the 'handleInterface' functions should be a
207 /// FailureOr<Operation *> indicating whether there was a failure or otherwise
208 /// returning the operation itself or a replacement operation.
209 ///
210 /// Note: The difference compared to `TypeSwitch` is that all
211 /// matching cases are applied instead of just the first match.
212 template <typename InterfaceT, typename... InterfacesU>
213 FailureOr<Operation *> handleOp(Operation *op) {
214 Operation *next = op;
215 if (auto concreteOp = dyn_cast<InterfaceT>(op)) {
216 FailureOr<Operation *> result = handleInterface(concreteOp);
217 if (failed(Result: result))
218 return failure();
219 next = *result;
220 }
221 if (!next)
222 return FailureOr<Operation *>(nullptr);
223 return handleOp<InterfacesU...>(next);
224 }
225
226 /// Apply all supported interface handlers to the given op.
227 FailureOr<Operation *> handleAllInterfaces(Operation *op) {
228 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op))
229 return deallocOpInterface.process(state, options);
230
231 if (failed(Result: verifyOperationPreconditions(op)))
232 return failure();
233
234 return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface,
235 CallOpInterface, BranchOpInterface,
236 RegionBranchTerminatorOpInterface>(op);
237 }
238
239 /// Make sure that for each forwarded MemRef value, an ownership indicator
240 /// `i1` value is forwarded as well such that the successor block knows
241 /// whether the MemRef has to be deallocated.
242 ///
243 /// Example:
244 /// ```
245 /// ^bb1:
246 /// <more ops...>
247 /// cf.br ^bb2(<forward-to-bb2>)
248 /// ```
249 /// becomes
250 /// ```
251 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
252 /// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>)
253 /// ^bb1:
254 /// <more ops...>
255 /// o = bufferization.dealloc m if c retain r
256 /// // replace ownership(r) with o element-wise
257 /// cf.br ^bb2(<forward-to-bb2>, o)
258 /// ```
259 FailureOr<Operation *> handleInterface(BranchOpInterface op);
260
261 /// Add an ownership indicator for every forwarding MemRef operand and result.
262 /// Nested regions never take ownership of MemRefs owned by a parent region
263 /// (neither via forwarding operand nor when captured implicitly when the
264 /// region is not isolated from above). Ownerships will only be passed to peer
265 /// regions (when an operation has multiple regions, such as scf.while), or to
266 /// parent regions.
267 /// Note that the block arguments in the nested region are currently handled
268 /// centrally in the 'dealloc' function, but better interface support could
269 /// allow us to do this here for the nested region specifically to reduce the
270 /// amount of assumptions we make on the structure of ops implementing this
271 /// interface.
272 ///
273 /// Example:
274 /// ```
275 /// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) {
276 /// <more ops...>
277 /// scf.yield %m : memref<2xi32>, i1
278 /// }
279 /// ```
280 /// becomes
281 /// ```
282 /// %ret:2 = scf.for %i = %c0 to %c10 step %c1
283 /// iter_args(%m = %memref, %own = %false) {
284 /// <more ops...>
285 /// // Note that the scf.yield is handled by the
286 /// // RegionBranchTerminatorOpInterface (not this handler)
287 /// // let o = getMemrefWithUniqueOwnership(%own)
288 /// scf.yield %m, o : memref<2xi32>, i1
289 /// }
290 /// ```
291 FailureOr<Operation *> handleInterface(RegionBranchOpInterface op);
292
293 /// If the private-function-dynamic-ownership pass option is enabled and the
294 /// called function is private, additional results are added for each MemRef
295 /// result to pass the dynamic ownership indicator along. Otherwise, updates
296 /// the ownership map and list of memrefs to be deallocated according to the
297 /// function boundary ABI, i.e., assume ownership of all returned MemRefs.
298 ///
299 /// Example (assume `private-function-dynamic-ownership` is enabled):
300 /// ```
301 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
302 /// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
303 ///
304 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
305 /// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32>
306 /// ```
307 /// becomes
308 /// ```
309 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
310 /// func.func private @g(%arg0: memref<2xi32>) -> (memref<2xi32>, i1) {...}
311 ///
312 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
313 /// // set ownership(%ret_f) := true
314 /// // remember to deallocate %ret_f
315 ///
316 /// %ret_g:2 = func.call @g(%memref) : (memref<2xi32>) -> (memref<2xi32>, i1)
317 /// // set ownership(%ret_g#0) := %ret_g#1
318 /// // remember to deallocate %ret_g if it comes with ownership
319 /// ```
320 FailureOr<Operation *> handleInterface(CallOpInterface op);
321
322 /// Takes care of allocation and free side-effects. It collects allocated
323 /// MemRefs that we have to add to manually deallocate, but also removes
324 /// values again that are already deallocated before the end of the block. It
325 /// also updates the ownership map accordingly.
326 ///
327 /// Example:
328 /// ```
329 /// %alloc = memref.alloc()
330 /// %alloca = memref.alloca()
331 /// ```
332 /// becomes
333 /// ```
334 /// %alloc = memref.alloc()
335 /// %alloca = memref.alloca()
336 /// // set ownership(alloc) := true
337 /// // set ownership(alloca) := false
338 /// // remember to deallocate %alloc
339 /// ```
340 FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op);
341
342 /// Takes care that the function boundary ABI is adhered to if the parent
343 /// operation implements FunctionOpInterface, inserting a
344 /// `bufferization.clone` if necessary, and inserts the
345 /// `bufferization.dealloc` operation according to the ops operands.
346 ///
347 /// Example:
348 /// ```
349 /// ^bb1:
350 /// <more ops...>
351 /// func.return <return-vals>
352 /// ```
353 /// becomes
354 /// ```
355 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
356 /// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>)
357 /// ^bb1:
358 /// <more ops...>
359 /// o = bufferization.dealloc m if c retain r
360 /// func.return <return-vals>
361 /// (if !isFunctionWithoutDynamicOwnership: append o)
362 /// ```
363 FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op);
364
365 /// Construct a new operation which is exactly the same as the passed 'op'
366 /// except that the OpResults list is appended by new results of the passed
367 /// 'types'.
368 /// TODO: ideally, this would be implemented using an OpInterface because it
369 /// is used to append function results, loop iter_args, etc. and thus makes
370 /// some assumptions that the variadic list of those is at the end of the
371 /// OpResults range.
372 Operation *appendOpResults(Operation *op, ArrayRef<Type> types);
373
374 /// A convenience template for the generic 'appendOpResults' function above to
375 /// avoid manual casting of the result.
376 template <typename OpTy>
377 OpTy appendOpResults(OpTy op, ArrayRef<Type> types) {
378 return cast<OpTy>(appendOpResults(op.getOperation(), types));
379 }
380
381 /// Performs deallocation of a single basic block. This is a private function
382 /// because some internal data structures have to be set up beforehand and
383 /// this function has to be called on blocks in a region in dominance order.
384 LogicalResult deallocate(Block *block);
385
386 /// After all relevant interfaces of an operation have been processed by the
387 /// 'handleInterface' functions, this function sets the ownership of operation
388 /// results that have not been set yet by the 'handleInterface' functions. It
389 /// generally assumes that each result can alias with every operand of the
390 /// operation, if there are MemRef typed results but no MemRef operands it
391 /// assigns 'false' as ownership. This happens, e.g., for the
392 /// memref.get_global operation. It would also be possible to query some alias
393 /// analysis to get more precise ownerships, however, the analysis would have
394 /// to be updated according to the IR modifications this pass performs (e.g.,
395 /// re-building operations to have more result values, inserting clone
396 /// operations, etc.).
397 void populateRemainingOwnerships(Operation *op);
398
399 /// Given an SSA value of MemRef type, returns the same of a new SSA value
400 /// which has 'Unique' ownership where the ownership indicator is guaranteed
401 /// to be always 'true'.
402 Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
403 Value memref, Block *block);
404
405 /// Returns whether the given operation implements FunctionOpInterface, has
406 /// private visibility, and the private-function-dynamic-ownership pass option
407 /// is enabled.
408 bool isFunctionWithoutDynamicOwnership(Operation *op);
409
410 /// Given an SSA value of MemRef type, this function queries the
411 /// BufferDeallocationOpInterface of the defining operation of 'memref' for a
412 /// materialized ownership indicator for 'memref'. If the op does not
413 /// implement the interface or if the block for which the materialized value
414 /// is requested does not match the block in which 'memref' is defined, the
415 /// default implementation in
416 /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
417 std::pair<Value, Value>
418 materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
419
420 /// Checks all the preconditions for operations implementing the
421 /// FunctionOpInterface that have to hold for the deallocation to be
422 /// applicable:
423 /// (1) Checks that there are not explicit control flow loops.
424 static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);
425
426 /// Checks all the preconditions for operations inside the region of
427 /// operations implementing the FunctionOpInterface that have to hold for the
428 /// deallocation to be applicable:
429 /// (1) Checks if all operations that have at least one attached region
430 /// implement the RegionBranchOpInterface. This is not required in edge cases,
431 /// where we have a single attached region and the parent operation has no
432 /// results.
433 /// (2) Checks that no deallocations already exist. Especially deallocations
434 /// in nested regions are not properly supported yet since this requires
435 /// ownership of the memref to be transferred to the nested region, which does
436 /// not happen by default. This constrained can be lifted in the future.
437 /// (3) Checks that terminators with more than one successor except
438 /// `cf.cond_br` are not present and that either BranchOpInterface or
439 /// RegionBranchTerminatorOpInterface is implemented.
440 static LogicalResult verifyOperationPreconditions(Operation *op);
441
442 /// When the 'private-function-dynamic-ownership' pass option is enabled,
443 /// additional `i1` return values are added for each MemRef result in the
444 /// function signature. This function takes care of updating the
445 /// `function_type` attribute of the function according to the actually
446 /// returned values from the terminators.
447 static LogicalResult updateFunctionSignature(FunctionOpInterface op);
448
449private:
450 /// Collects all analysis state and including liveness, caches, ownerships of
451 /// already processed values and operations, and the MemRefs that have to be
452 /// deallocated at the end of each block.
453 DeallocationState state;
454
455 /// Collects all pass options in a single place.
456 DeallocationOptions options;
457};
458
459} // namespace
460
461//===----------------------------------------------------------------------===//
462// BufferDeallocation Implementation
463//===----------------------------------------------------------------------===//
464
465std::pair<Value, Value>
466BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
467 Block *block) {
468 // The interface can only materialize ownership indicators in the same block
469 // as the defining op.
470 if (memref.getParentBlock() != block)
471 return state.getMemrefWithUniqueOwnership(builder, memref, block);
472
473 Operation *owner = memref.getDefiningOp();
474 if (!owner)
475 owner = memref.getParentBlock()->getParentOp();
476
477 // If the op implements the interface, query it for a materialized ownership
478 // value.
479 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner))
480 return deallocOpInterface.materializeUniqueOwnershipForMemref(
481 state, options, builder, memref);
482
483 // Otherwise use the default implementation.
484 return state.getMemrefWithUniqueOwnership(builder, memref, block);
485}
486
487LogicalResult
488BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
489 // (1) Ensure that there are supported loops only (no explicit control flow
490 // loops).
491 Backedges backedges(op);
492 if (backedges.size()) {
493 op->emitError("Only structured control-flow loops are supported.");
494 return failure();
495 }
496
497 return success();
498}
499
500LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
501 // We do not care about ops that do not operate on buffers and have no
502 // Allocate/Free side effect.
503 if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op))
504 return success();
505
506 // (1) The pass does not work properly when deallocations are already present.
507 // Alternatively, we could also remove all deallocations as a pre-pass.
508 if (isa<DeallocOp>(op))
509 return op->emitError(
510 message: "No deallocation operations must be present when running this pass!");
511
512 // (2) Memory side effects of unregistered ops are unknown. In particular, we
513 // do not know whether an unregistered op allocates memory or not.
514 // - Ops with recursive memory effects are allowed. All nested ops in the
515 // regions of `op` will be analyzed separately.
516 // - Call ops are allowed even though they typically do not implement the
517 // MemoryEffectOpInterface. They usually do not have side effects apart
518 // from the callee, which will be analyzed separately. (This is similar to
519 // "recursive memory effects".)
520 if (!isa<MemoryEffectOpInterface>(op) &&
521 !op->hasTrait<OpTrait::HasRecursiveMemoryEffects>() &&
522 !isa<CallOpInterface>(op))
523 return op->emitError(
524 message: "ops with unknown memory side effects are not supported");
525
526 // (3) Check that the control flow structures are supported.
527 auto regions = op->getRegions();
528 // Check that if the operation has at
529 // least one region it implements the RegionBranchOpInterface. If there
530 // is an operation that does not fulfill this condition, we cannot apply
531 // the deallocation steps. Furthermore, we accept cases, where we have a
532 // region that returns no results, since, in that case, the intra-region
533 // control flow does not affect the transformation.
534 size_t size = regions.size();
535 if (((size == 1 && !op->getResults().empty()) || size > 1) &&
536 !dyn_cast<RegionBranchOpInterface>(op)) {
537 return op->emitError(message: "All operations with attached regions need to "
538 "implement the RegionBranchOpInterface.");
539 }
540
541 // (3) Check that terminators with more than one successor except `cf.cond_br`
542 // are not present and that either BranchOpInterface or
543 // RegionBranchTerminatorOpInterface is implemented.
544 if (op->hasTrait<OpTrait::NoTerminator>())
545 return op->emitError(message: "NoTerminator trait is not supported");
546
547 if (op->hasTrait<OpTrait::IsTerminator>()) {
548 // Either one of those interfaces has to be implemented on terminators, but
549 // not both.
550 if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) ||
551 (isa<BranchOpInterface>(op) &&
552 isa<RegionBranchTerminatorOpInterface>(op)))
553
554 return op->emitError(
555 message: "Terminators must implement either BranchOpInterface or "
556 "RegionBranchTerminatorOpInterface (but not both)!");
557
558 // We only support terminators with 0 or 1 successors for now and
559 // special-case the conditional branch op.
560 if (op->getSuccessors().size() > 1)
561
562 return op->emitError(message: "Terminators with more than one successor "
563 "are not supported!");
564 }
565
566 return success();
567}
568
569LogicalResult
570BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
571 SmallVector<TypeRange> returnOperandTypes(llvm::map_range(
572 op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(),
573 [](RegionBranchTerminatorOpInterface op) {
574 return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes();
575 }));
576 if (!llvm::all_equal(Range&: returnOperandTypes))
577 return op->emitError(
578 "there are multiple return operations with different operand types");
579
580 TypeRange resultTypes = op.getResultTypes();
581 // Check if we found a return operation because that doesn't necessarily
582 // always have to be the case, e.g., consider a function with one block that
583 // has a cf.br at the end branching to itself again (i.e., an infinite loop).
584 // In that case we don't want to crash but just not update the return types.
585 if (!returnOperandTypes.empty())
586 resultTypes = returnOperandTypes[0];
587
588 op.setFunctionTypeAttr(TypeAttr::get(FunctionType::get(
589 op->getContext(), op.getFunctionBody().front().getArgumentTypes(),
590 resultTypes)));
591
592 return success();
593}
594
595LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
596 // Stop and emit a proper error message if we don't support the input IR.
597 if (failed(verifyFunctionPreconditions(op: op)))
598 return failure();
599
600 // Process the function block by block.
601 auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
602 [&](Block *block) {
603 if (failed(deallocate(block)))
604 return WalkResult::interrupt();
605 return WalkResult::advance();
606 });
607 if (result.wasInterrupted())
608 return failure();
609
610 // Update the function signature if the function is private, dynamic ownership
611 // is enabled, and the function has memrefs as arguments or results.
612 return updateFunctionSignature(op: op);
613}
614
615LogicalResult BufferDeallocation::deallocate(Block *block) {
616 OpBuilder builder = OpBuilder::atBlockBegin(block);
617
618 // Compute liveness transfers of ownership to this block.
619 SmallVector<Value> liveMemrefs;
620 state.getLiveMemrefsIn(block, memrefs&: liveMemrefs);
621 for (auto li : liveMemrefs) {
622 // Ownership of implicitly captured memrefs from other regions is never
623 // taken, but ownership of memrefs in the same region (but different block)
624 // is taken.
625 if (li.getParentRegion() == block->getParent()) {
626 state.updateOwnership(memref: li, ownership: state.getOwnership(memref: li, block: li.getParentBlock()),
627 block);
628 state.addMemrefToDeallocate(memref: li, block);
629 continue;
630 }
631
632 if (li.getParentRegion()->isProperAncestor(other: block->getParent())) {
633 Value falseVal = buildBoolValue(builder, loc: li.getLoc(), value: false);
634 state.updateOwnership(memref: li, ownership: falseVal, block);
635 }
636 }
637
638 for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) {
639 BlockArgument arg = block->getArgument(i);
640 if (!isMemref(v: arg))
641 continue;
642
643 // Adhere to function boundary ABI: no ownership of function argument
644 // MemRefs is taken.
645 if (isa<FunctionOpInterface>(Val: block->getParentOp()) &&
646 block->isEntryBlock()) {
647 Value newArg = buildBoolValue(builder, loc: arg.getLoc(), value: false);
648 state.updateOwnership(memref: arg, ownership: newArg);
649 state.addMemrefToDeallocate(memref: arg, block);
650 continue;
651 }
652
653 // Pass MemRef ownerships along via `i1` values.
654 Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc());
655 state.updateOwnership(memref: arg, ownership: newArg);
656 state.addMemrefToDeallocate(memref: arg, block);
657 }
658
659 // For each operation in the block, handle the interfaces that affect aliasing
660 // and ownership of memrefs.
661 for (Operation &op : llvm::make_early_inc_range(Range&: *block)) {
662 FailureOr<Operation *> result = handleAllInterfaces(op: &op);
663 if (failed(Result: result))
664 return failure();
665 if (!*result)
666 continue;
667
668 populateRemainingOwnerships(op: *result);
669 }
670
671 // TODO: if block has no terminator, handle dealloc insertion here.
672 return success();
673}
674
675Operation *BufferDeallocation::appendOpResults(Operation *op,
676 ArrayRef<Type> types) {
677 SmallVector<Type> newTypes(op->getResultTypes());
678 newTypes.append(in_start: types.begin(), in_end: types.end());
679 auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes,
680 op->getOperands(), op->getAttrDictionary(),
681 op->getPropertiesStorage(),
682 op->getSuccessors(), op->getNumRegions());
683 for (auto [oldRegion, newRegion] :
684 llvm::zip(op->getRegions(), newOp->getRegions()))
685 newRegion.takeBody(oldRegion);
686
687 OpBuilder(op).insert(op: newOp);
688 op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults()));
689 op->erase();
690
691 return newOp;
692}
693
694FailureOr<Operation *>
695BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
696 OpBuilder builder = OpBuilder::atBlockBegin(block: op->getBlock());
697
698 // TODO: the RegionBranchOpInterface does not provide all the necessary
699 // methods to perform this transformation without additional assumptions on
700 // the structure. In particular, that
701 // * additional values to be passed to the next region can be added to the end
702 // of the operand list, the end of the block argument list, and the end of
703 // the result value list. However, it seems to be the general guideline for
704 // operations implementing this interface to follow this structure.
705 // * and that the block arguments and result values match the forwarded
706 // operands one-to-one (i.e., that there are no other values appended to the
707 // front).
708 // These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while`
709 // operations.
710
711 SmallVector<RegionSuccessor> regions;
712 op.getSuccessorRegions(RegionBranchPoint::parent(), regions);
713 assert(!regions.empty() && "Must have at least one successor region");
714 SmallVector<Value> entryOperands(
715 op.getEntrySuccessorOperands(regions.front()));
716 unsigned numMemrefOperands = llvm::count_if(Range&: entryOperands, P: isMemref);
717
718 // No ownership is acquired for any MemRefs that are passed to the region from
719 // the outside.
720 Value falseVal = buildBoolValue(builder, op.getLoc(), false);
721 op->insertOperands(op->getNumOperands(),
722 SmallVector<Value>(numMemrefOperands, falseVal));
723
724 int counter = op->getNumResults();
725 unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref);
726 SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type());
727 RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults);
728
729 for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) {
730 state.updateOwnership(result, newOp->getResult(counter++));
731 state.addMemrefToDeallocate(result, newOp->getBlock());
732 }
733
734 return newOp.getOperation();
735}
736
737Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
738 OpBuilder &builder, Value memref, Block *block) {
739 // First, make sure we at least have 'Unique' ownership already.
740 std::pair<Value, Value> newMemrefAndOnwership =
741 materializeUniqueOwnership(builder, memref, block);
742 Value newMemref = newMemrefAndOnwership.first;
743 Value condition = newMemrefAndOnwership.second;
744
745 // Avoid inserting additional IR if ownership is already guaranteed. In
746 // particular, this is already the case when we had 'Unknown' ownership
747 // initially and a clone was inserted to get to 'Unique' ownership.
748 if (matchPattern(value: condition, pattern: m_One()))
749 return newMemref;
750
751 // Insert a runtime check and only clone if we still don't have ownership at
752 // runtime.
753 Value maybeClone =
754 builder
755 .create<scf::IfOp>(
756 memref.getLoc(), condition,
757 [&](OpBuilder &builder, Location loc) {
758 builder.create<scf::YieldOp>(loc, newMemref);
759 },
760 [&](OpBuilder &builder, Location loc) {
761 Value clone =
762 builder.create<bufferization::CloneOp>(loc, newMemref);
763 builder.create<scf::YieldOp>(loc, clone);
764 })
765 .getResult(0);
766 Value trueVal = buildBoolValue(builder, loc: memref.getLoc(), value: true);
767 state.updateOwnership(memref: maybeClone, ownership: trueVal);
768 state.addMemrefToDeallocate(memref: maybeClone, block: maybeClone.getParentBlock());
769 return maybeClone;
770}
771
772FailureOr<Operation *>
773BufferDeallocation::handleInterface(BranchOpInterface op) {
774 if (op->getNumSuccessors() > 1)
775 return op->emitError("BranchOpInterface operations with multiple "
776 "successors are not supported yet");
777
778 if (op->getNumSuccessors() != 1)
779 return emitError(op.getLoc(),
780 "only BranchOpInterface operations with exactly "
781 "one successor are supported yet");
782
783 if (op.getSuccessorOperands(0).getProducedOperandCount() > 0)
784 return op.emitError("produced operands are not supported");
785
786 // Collect the values to deallocate and retain and use them to create the
787 // dealloc operation.
788 Block *block = op->getBlock();
789 OpBuilder builder(op);
790 SmallVector<Value> memrefs, conditions, toRetain;
791 if (failed(state.getMemrefsAndConditionsToDeallocate(
792 builder, loc: op.getLoc(), block, memrefs, conditions)))
793 return failure();
794
795 OperandRange forwardedOperands =
796 op.getSuccessorOperands(0).getForwardedOperands();
797 state.getMemrefsToRetain(fromBlock: block, toBlock: op->getSuccessor(0), destOperands: forwardedOperands,
798 toRetain);
799
800 auto deallocOp = builder.create<bufferization::DeallocOp>(
801 op.getLoc(), memrefs, conditions, toRetain);
802
803 // We want to replace the current ownership of the retained values with the
804 // result values of the dealloc operation as they are always unique.
805 state.resetOwnerships(memrefs: deallocOp.getRetained(), block);
806 for (auto [retained, ownership] :
807 llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) {
808 state.updateOwnership(retained, ownership, block);
809 }
810
811 unsigned numAdditionalReturns = llvm::count_if(Range&: forwardedOperands, P: isMemref);
812 SmallVector<Value> newOperands(forwardedOperands);
813 auto additionalConditions =
814 deallocOp.getUpdatedConditions().take_front(numAdditionalReturns);
815 newOperands.append(additionalConditions.begin(), additionalConditions.end());
816 op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands);
817
818 return op.getOperation();
819}
820
821FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
822 OpBuilder builder(op);
823
824 // Lookup the function operation and check if it has private visibility. If
825 // the function is referenced by SSA value instead of a Symbol, it's assumed
826 // to be public. (And we cannot easily change the type of the SSA value
827 // anyway.)
828 Operation *funcOp = op.resolveCallableInTable(state.getSymbolTable());
829 bool isPrivate = false;
830 if (auto symbol = dyn_cast_or_null<SymbolOpInterface>(funcOp))
831 isPrivate = symbol.isPrivate() && !symbol.isDeclaration();
832
833 // If the private-function-dynamic-ownership option is enabled and we are
834 // calling a private function, we need to add an additional `i1` result for
835 // each MemRef result to dynamically pass the current ownership indicator
836 // rather than adhering to the function boundary ABI.
837 if (options.privateFuncDynamicOwnership && isPrivate) {
838 unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref);
839 SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type());
840 unsigned ownershipCounter = op->getNumResults();
841 op = appendOpResults(op, ownershipTypesToAppend);
842
843 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
844 state.updateOwnership(result, op->getResult(ownershipCounter++));
845 state.addMemrefToDeallocate(result, result.getParentBlock());
846 }
847
848 return op.getOperation();
849 }
850
851 // According to the function boundary ABI we are guaranteed to get ownership
852 // of all MemRefs returned by the function. Thus we set ownership to constant
853 // 'true' and remember to deallocate it.
854 Value trueVal = buildBoolValue(builder, op.getLoc(), true);
855 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
856 state.updateOwnership(result, trueVal);
857 state.addMemrefToDeallocate(result, result.getParentBlock());
858 }
859
860 return op.getOperation();
861}
862
863FailureOr<Operation *>
864BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
865 auto *block = op->getBlock();
866 OpBuilder builder = OpBuilder::atBlockBegin(block: block);
867
868 for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
869 if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
870 // The bufferization.manual_deallocation attribute can be attached to ops
871 // with an allocation and/or deallocation side effect. It indicates that
872 // the op is under a "manual deallocation" scheme. Deallocation ops are
873 // usually forbidden in the input IR (not supported by the buffer
874 // deallocation pass). However, if they are under manual deallocation,
875 // they can be safely ignored by the buffer deallocation pass.
876 if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
877 return op->emitError(
878 "memory free side-effect on MemRef value not supported!");
879
880 // Buffers that were allocated under "manual deallocation" may be
881 // manually deallocated. We insert a runtime assertion to cover certain
882 // cases of invalid IR where an automatically managed buffer allocation
883 // is manually deallocated. This is not a bulletproof check!
884 OpBuilder::InsertionGuard g(builder);
885 builder.setInsertionPoint(op);
886 Ownership ownership = state.getOwnership(operand, block);
887 if (ownership.isUnique()) {
888 Value ownershipInverted = builder.create<arith::XOrIOp>(
889 op.getLoc(), ownership.getIndicator(),
890 buildBoolValue(builder, op.getLoc(), true));
891 builder.create<cf::AssertOp>(
892 op.getLoc(), ownershipInverted,
893 "expected that the block does not have ownership");
894 }
895 }
896 }
897
898 for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
899 auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
900 if (allocEffect.has_value()) {
901 if (isa<SideEffects::AutomaticAllocationScopeResource>(
902 allocEffect->getResource())) {
903 // Make sure that the ownership of auto-managed allocations is set to
904 // false. This is important for operations that have at least one memref
905 // typed operand. E.g., consider an operation like `bufferization.clone`
906 // that lowers to a `memref.alloca + memref.copy` instead of a
907 // `memref.alloc`. If we wouldn't set the ownership of the result here,
908 // the default ownership population in `populateRemainingOwnerships`
909 // would assume aliasing with the MemRef operand.
910 state.resetOwnerships(res, block);
911 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
912 continue;
913 }
914
915 if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
916 // This allocation will be deallocated manually. Assign an ownership of
917 // "false", so that it will never be deallocated by the buffer
918 // deallocation pass.
919 state.resetOwnerships(res, block);
920 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
921 continue;
922 }
923
924 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
925 state.addMemrefToDeallocate(res, block);
926 }
927 }
928
929 return op.getOperation();
930}
931
932FailureOr<Operation *>
933BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
934 OpBuilder builder(op);
935
936 // If this is a return operation of a function that is not private or the
937 // dynamic function boundary ownership is disabled, we need to return memref
938 // values for which we have guaranteed ownership to pass on to adhere to the
939 // function boundary ABI.
940 bool funcWithoutDynamicOwnership =
941 isFunctionWithoutDynamicOwnership(op: op->getParentOp());
942 if (funcWithoutDynamicOwnership) {
943 for (OpOperand &val : op->getOpOperands()) {
944 if (!isMemref(val.get()))
945 continue;
946
947 val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
948 op->getBlock()));
949 }
950 }
951
952 // TODO: getSuccessorRegions is not implemented by all operations we care
953 // about, but we would need to check how many successors there are and under
954 // which condition they are taken, etc.
955
956 MutableOperandRange operands =
957 op.getMutableSuccessorOperands(RegionBranchPoint::parent());
958
959 SmallVector<Value> updatedOwnerships;
960 auto result = deallocation_impl::insertDeallocOpForReturnLike(
961 state, op: op, operands: operands.getAsOperandRange(), updatedOperandOwnerships&: updatedOwnerships);
962 if (failed(result) || !*result)
963 return result;
964
965 // Add an additional operand for every MemRef for the ownership indicator.
966 if (!funcWithoutDynamicOwnership) {
967 SmallVector<Value> newOperands{operands.getAsOperandRange()};
968 newOperands.append(in_start: updatedOwnerships.begin(), in_end: updatedOwnerships.end());
969 operands.assign(values: newOperands);
970 }
971
972 return op.getOperation();
973}
974
975bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) {
976 auto funcOp = dyn_cast<FunctionOpInterface>(op);
977 return funcOp && (!options.privateFuncDynamicOwnership ||
978 !funcOp.isPrivate() || funcOp.isExternal());
979}
980
981void BufferDeallocation::populateRemainingOwnerships(Operation *op) {
982 for (auto res : op->getResults()) {
983 if (!isMemref(v: res))
984 continue;
985 if (!state.getOwnership(memref: res, block: op->getBlock()).isUninitialized())
986 continue;
987
988 // The op does not allocate memory, otherwise, it would have been assigned
989 // an ownership during `handleInterface`. Assume the result may alias with
990 // any memref operand and thus combine all their ownerships.
991 for (auto operand : op->getOperands()) {
992 if (!isMemref(v: operand))
993 continue;
994
995 state.updateOwnership(
996 memref: res, ownership: state.getOwnership(memref: operand, block: operand.getParentBlock()),
997 block: op->getBlock());
998 }
999
1000 // If the ownership value is still uninitialized (e.g., because the op has
1001 // no memref operands), assume that no ownership is taken. E.g., this is the
1002 // case for "memref.get_global".
1003 //
1004 // Note: This can lead to memory leaks if memory side effects are not
1005 // properly specified on the op.
1006 if (state.getOwnership(memref: res, block: op->getBlock()).isUninitialized()) {
1007 OpBuilder builder(op);
1008 state.updateOwnership(memref: res, ownership: buildBoolValue(builder, loc: op->getLoc(), value: false));
1009 }
1010 }
1011}
1012
1013//===----------------------------------------------------------------------===//
1014// OwnershipBasedBufferDeallocationPass
1015//===----------------------------------------------------------------------===//
1016
1017namespace {
1018
1019/// The actual buffer deallocation pass that inserts and moves dealloc nodes
1020/// into the right positions. Furthermore, it inserts additional clones if
1021/// necessary. It uses the algorithm described at the top of the file.
1022struct OwnershipBasedBufferDeallocationPass
1023 : public bufferization::impl::OwnershipBasedBufferDeallocationPassBase<
1024 OwnershipBasedBufferDeallocationPass> {
1025 using Base::Base;
1026
1027 void runOnOperation() override {
1028 DeallocationOptions options;
1029 options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
1030
1031 mlir::SymbolTableCollection symbolTables;
1032
1033 auto status = getOperation()->walk([&](func::FuncOp func) {
1034 if (func.isExternal())
1035 return WalkResult::skip();
1036
1037 if (failed(deallocateBuffersOwnershipBased(func, options, symbolTables)))
1038 return WalkResult::interrupt();
1039
1040 return WalkResult::advance();
1041 });
1042 if (status.wasInterrupted())
1043 signalPassFailure();
1044 }
1045};
1046
1047} // namespace
1048
1049//===----------------------------------------------------------------------===//
1050// Implement bufferization API
1051//===----------------------------------------------------------------------===//
1052
1053LogicalResult bufferization::deallocateBuffersOwnershipBased(
1054 FunctionOpInterface op, DeallocationOptions options,
1055 SymbolTableCollection &symbolTables) {
1056 // Gather all required allocation nodes and prepare the deallocation phase.
1057 BufferDeallocation deallocation(op, options, symbolTables);
1058
1059 // Place all required temporary clone and dealloc nodes.
1060 return deallocation.deallocate(op);
1061}
1062

source code of mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp