1//===- OwnershipBasedBufferDeallocation.cpp - impl. for buffer dealloc. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements logic for computing correct `bufferization.dealloc`
10// positions. Furthermore, buffer deallocation also adds required new clone
11// operations to ensure that memrefs returned by functions never alias an
12// argument.
13//
14// TODO:
15// The current implementation does not support explicit-control-flow loops and
16// the resulting code will be invalid with respect to program semantics.
17// However, structured control-flow loops are fully supported.
18//
19//===----------------------------------------------------------------------===//
20
21#include "mlir/Dialect/Bufferization/IR/BufferDeallocationOpInterface.h"
22#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
23#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
24#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
25#include "mlir/Dialect/Func/IR/FuncOps.h"
26#include "mlir/Dialect/MemRef/IR/MemRef.h"
27#include "mlir/Dialect/SCF/IR/SCF.h"
28#include "mlir/IR/Iterators.h"
29#include "mlir/Interfaces/ControlFlowInterfaces.h"
30
31namespace mlir {
32namespace bufferization {
33#define GEN_PASS_DEF_OWNERSHIPBASEDBUFFERDEALLOCATION
34#include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc"
35} // namespace bufferization
36} // namespace mlir
37
38using namespace mlir;
39using namespace mlir::bufferization;
40
41//===----------------------------------------------------------------------===//
42// Helpers
43//===----------------------------------------------------------------------===//
44
45static Value buildBoolValue(OpBuilder &builder, Location loc, bool value) {
46 return builder.create<arith::ConstantOp>(loc, builder.getBoolAttr(value));
47}
48
49static bool isMemref(Value v) { return isa<BaseMemRefType>(Val: v.getType()); }
50
51/// Return "true" if the given op is guaranteed to have neither "Allocate" nor
52/// "Free" side effects.
53static bool hasNeitherAllocateNorFreeSideEffect(Operation *op) {
54 if (isa<MemoryEffectOpInterface>(op))
55 return hasEffect<MemoryEffects::Allocate>(op) ||
56 hasEffect<MemoryEffects::Free>(op);
57 // If the op does not implement the MemoryEffectOpInterface but has has
58 // recursive memory effects, then this op in isolation (without its body) does
59 // not have any side effects. All the ops inside the regions of this op will
60 // be processed separately.
61 return op->hasTrait<OpTrait::HasRecursiveMemoryEffects>();
62}
63
64/// Return "true" if the given op has buffer semantics. I.e., it has buffer
65/// operands, buffer results and/or buffer region entry block arguments.
66static bool hasBufferSemantics(Operation *op) {
67 if (llvm::any_of(Range: op->getOperands(), P: isMemref) ||
68 llvm::any_of(Range: op->getResults(), P: isMemref))
69 return true;
70 for (Region &region : op->getRegions())
71 if (!region.empty())
72 if (llvm::any_of(Range: region.front().getArguments(), P: isMemref))
73 return true;
74 return false;
75}
76
77//===----------------------------------------------------------------------===//
78// Backedges analysis
79//===----------------------------------------------------------------------===//
80
81namespace {
82
83/// A straight-forward program analysis which detects loop backedges induced by
84/// explicit control flow.
85class Backedges {
86public:
87 using BlockSetT = SmallPtrSet<Block *, 16>;
88 using BackedgeSetT = llvm::DenseSet<std::pair<Block *, Block *>>;
89
90public:
91 /// Constructs a new backedges analysis using the op provided.
92 Backedges(Operation *op) { recurse(op); }
93
94 /// Returns the number of backedges formed by explicit control flow.
95 size_t size() const { return edgeSet.size(); }
96
97 /// Returns the start iterator to loop over all backedges.
98 BackedgeSetT::const_iterator begin() const { return edgeSet.begin(); }
99
100 /// Returns the end iterator to loop over all backedges.
101 BackedgeSetT::const_iterator end() const { return edgeSet.end(); }
102
103private:
104 /// Enters the current block and inserts a backedge into the `edgeSet` if we
105 /// have already visited the current block. The inserted edge links the given
106 /// `predecessor` with the `current` block.
107 bool enter(Block &current, Block *predecessor) {
108 bool inserted = visited.insert(Ptr: &current).second;
109 if (!inserted)
110 edgeSet.insert(V: std::make_pair(x&: predecessor, y: &current));
111 return inserted;
112 }
113
114 /// Leaves the current block.
115 void exit(Block &current) { visited.erase(Ptr: &current); }
116
117 /// Recurses into the given operation while taking all attached regions into
118 /// account.
119 void recurse(Operation *op) {
120 Block *current = op->getBlock();
121 // If the current op implements the `BranchOpInterface`, there can be
122 // cycles in the scope of all successor blocks.
123 if (isa<BranchOpInterface>(Val: op)) {
124 for (Block *succ : current->getSuccessors())
125 recurse(block&: *succ, predecessor: current);
126 }
127 // Recurse into all distinct regions and check for explicit control-flow
128 // loops.
129 for (Region &region : op->getRegions()) {
130 if (!region.empty())
131 recurse(block&: region.front(), predecessor: current);
132 }
133 }
134
135 /// Recurses into explicit control-flow structures that are given by
136 /// the successor relation defined on the block level.
137 void recurse(Block &block, Block *predecessor) {
138 // Try to enter the current block. If this is not possible, we are
139 // currently processing this block and can safely return here.
140 if (!enter(current&: block, predecessor))
141 return;
142
143 // Recurse into all operations and successor blocks.
144 for (Operation &op : block.getOperations())
145 recurse(op: &op);
146
147 // Leave the current block.
148 exit(current&: block);
149 }
150
151 /// Stores all blocks that are currently visited and on the processing stack.
152 BlockSetT visited;
153
154 /// Stores all backedges in the format (source, target).
155 BackedgeSetT edgeSet;
156};
157
158} // namespace
159
160//===----------------------------------------------------------------------===//
161// BufferDeallocation
162//===----------------------------------------------------------------------===//
163
164namespace {
165/// The buffer deallocation transformation which ensures that all allocs in the
166/// program have a corresponding de-allocation.
167class BufferDeallocation {
168public:
169 BufferDeallocation(Operation *op, DeallocationOptions options)
170 : state(op), options(options) {}
171
172 /// Performs the actual placement/creation of all dealloc operations.
173 LogicalResult deallocate(FunctionOpInterface op);
174
175private:
176 /// The base case for the recursive template below.
177 template <typename... T>
178 typename std::enable_if<sizeof...(T) == 0, FailureOr<Operation *>>::type
179 handleOp(Operation *op) {
180 return op;
181 }
182
183 /// Applies all the handlers of the interfaces in the template list
184 /// implemented by 'op'. In particular, if an operation implements more than
185 /// one of the interfaces in the template list, all the associated handlers
186 /// will be applied to the operation in the same order as the template list
187 /// specifies. If a handler reports a failure or removes the operation without
188 /// replacement (indicated by returning 'nullptr'), no further handlers are
189 /// applied and the return value is propagated to the caller of 'handleOp'.
190 ///
191 /// The interface handlers job is to update the deallocation state, most
192 /// importantly the ownership map and list of memrefs to potentially be
193 /// deallocated per block, but also to insert `bufferization.dealloc`
194 /// operations where needed. Obviously, no MemRefs that may be used at a later
195 /// point in the control-flow may be deallocated and the ownership map has to
196 /// be updated to reflect potential ownership changes caused by the dealloc
197 /// operation (e.g., if two interfaces on the same op insert a dealloc
198 /// operation each, the second one should query the ownership map and use them
199 /// as deallocation condition such that MemRefs already deallocated in the
200 /// first dealloc operation are not deallocated a second time (double-free)).
201 /// Note that currently only the interfaces on terminators may insert dealloc
202 /// operations and it is verified as a precondition that a terminator op must
203 /// implement exactly one of the interfaces handling dealloc insertion.
204 ///
205 /// The return value of the 'handleInterface' functions should be a
206 /// FailureOr<Operation *> indicating whether there was a failure or otherwise
207 /// returning the operation itself or a replacement operation.
208 ///
209 /// Note: The difference compared to `TypeSwitch` is that all
210 /// matching cases are applied instead of just the first match.
211 template <typename InterfaceT, typename... InterfacesU>
212 FailureOr<Operation *> handleOp(Operation *op) {
213 Operation *next = op;
214 if (auto concreteOp = dyn_cast<InterfaceT>(op)) {
215 FailureOr<Operation *> result = handleInterface(concreteOp);
216 if (failed(result))
217 return failure();
218 next = *result;
219 }
220 if (!next)
221 return FailureOr<Operation *>(nullptr);
222 return handleOp<InterfacesU...>(next);
223 }
224
225 /// Apply all supported interface handlers to the given op.
226 FailureOr<Operation *> handleAllInterfaces(Operation *op) {
227 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(op))
228 return deallocOpInterface.process(state, options);
229
230 if (failed(result: verifyOperationPreconditions(op)))
231 return failure();
232
233 return handleOp<MemoryEffectOpInterface, RegionBranchOpInterface,
234 CallOpInterface, BranchOpInterface,
235 RegionBranchTerminatorOpInterface>(op);
236 }
237
238 /// Make sure that for each forwarded MemRef value, an ownership indicator
239 /// `i1` value is forwarded as well such that the successor block knows
240 /// whether the MemRef has to be deallocated.
241 ///
242 /// Example:
243 /// ```
244 /// ^bb1:
245 /// <more ops...>
246 /// cf.br ^bb2(<forward-to-bb2>)
247 /// ```
248 /// becomes
249 /// ```
250 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
251 /// // let r = getMemrefsToRetain(bb1, bb2, <forward-to-bb2>)
252 /// ^bb1:
253 /// <more ops...>
254 /// o = bufferization.dealloc m if c retain r
255 /// // replace ownership(r) with o element-wise
256 /// cf.br ^bb2(<forward-to-bb2>, o)
257 /// ```
258 FailureOr<Operation *> handleInterface(BranchOpInterface op);
259
260 /// Add an ownership indicator for every forwarding MemRef operand and result.
261 /// Nested regions never take ownership of MemRefs owned by a parent region
262 /// (neither via forwarding operand nor when captured implicitly when the
263 /// region is not isolated from above). Ownerships will only be passed to peer
264 /// regions (when an operation has multiple regions, such as scf.while), or to
265 /// parent regions.
266 /// Note that the block arguments in the nested region are currently handled
267 /// centrally in the 'dealloc' function, but better interface support could
268 /// allow us to do this here for the nested region specifically to reduce the
269 /// amount of assumptions we make on the structure of ops implementing this
270 /// interface.
271 ///
272 /// Example:
273 /// ```
274 /// %ret = scf.for %i = %c0 to %c10 step %c1 iter_args(%m = %memref) {
275 /// <more ops...>
276 /// scf.yield %m : memref<2xi32>, i1
277 /// }
278 /// ```
279 /// becomes
280 /// ```
281 /// %ret:2 = scf.for %i = %c0 to %c10 step %c1
282 /// iter_args(%m = %memref, %own = %false) {
283 /// <more ops...>
284 /// // Note that the scf.yield is handled by the
285 /// // RegionBranchTerminatorOpInterface (not this handler)
286 /// // let o = getMemrefWithUniqueOwnership(%own)
287 /// scf.yield %m, o : memref<2xi32>, i1
288 /// }
289 /// ```
290 FailureOr<Operation *> handleInterface(RegionBranchOpInterface op);
291
292 /// If the private-function-dynamic-ownership pass option is enabled and the
293 /// called function is private, additional results are added for each MemRef
294 /// result to pass the dynamic ownership indicator along. Otherwise, updates
295 /// the ownership map and list of memrefs to be deallocated according to the
296 /// function boundary ABI, i.e., assume ownership of all returned MemRefs.
297 ///
298 /// Example (assume `private-function-dynamic-ownership` is enabled):
299 /// ```
300 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
301 /// func.func private @g(%arg0: memref<2xi32>) -> memref<2xi32> {...}
302 ///
303 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
304 /// %ret_g = func.call @g(%memref) : (memref<2xi32>) -> memref<2xi32>
305 /// ```
306 /// becomes
307 /// ```
308 /// func.func @f(%arg0: memref<2xi32>) -> memref<2xi32> {...}
309 /// func.func private @g(%arg0: memref<2xi32>) -> (memref<2xi32>, i1) {...}
310 ///
311 /// %ret_f = func.call @f(%memref) : (memref<2xi32>) -> memref<2xi32>
312 /// // set ownership(%ret_f) := true
313 /// // remember to deallocate %ret_f
314 ///
315 /// %ret_g:2 = func.call @g(%memref) : (memref<2xi32>) -> (memref<2xi32>, i1)
316 /// // set ownership(%ret_g#0) := %ret_g#1
317 /// // remember to deallocate %ret_g if it comes with ownership
318 /// ```
319 FailureOr<Operation *> handleInterface(CallOpInterface op);
320
321 /// Takes care of allocation and free side-effects. It collects allocated
322 /// MemRefs that we have to add to manually deallocate, but also removes
323 /// values again that are already deallocated before the end of the block. It
324 /// also updates the ownership map accordingly.
325 ///
326 /// Example:
327 /// ```
328 /// %alloc = memref.alloc()
329 /// %alloca = memref.alloca()
330 /// ```
331 /// becomes
332 /// ```
333 /// %alloc = memref.alloc()
334 /// %alloca = memref.alloca()
335 /// // set ownership(alloc) := true
336 /// // set ownership(alloca) := false
337 /// // remember to deallocate %alloc
338 /// ```
339 FailureOr<Operation *> handleInterface(MemoryEffectOpInterface op);
340
341 /// Takes care that the function boundary ABI is adhered to if the parent
342 /// operation implements FunctionOpInterface, inserting a
343 /// `bufferization.clone` if necessary, and inserts the
344 /// `bufferization.dealloc` operation according to the ops operands.
345 ///
346 /// Example:
347 /// ```
348 /// ^bb1:
349 /// <more ops...>
350 /// func.return <return-vals>
351 /// ```
352 /// becomes
353 /// ```
354 /// // let (m, c) = getMemrefsAndConditionsToDeallocate(bb1)
355 /// // let r = getMemrefsToRetain(bb1, nullptr, <return-vals>)
356 /// ^bb1:
357 /// <more ops...>
358 /// o = bufferization.dealloc m if c retain r
359 /// func.return <return-vals>
360 /// (if !isFunctionWithoutDynamicOwnership: append o)
361 /// ```
362 FailureOr<Operation *> handleInterface(RegionBranchTerminatorOpInterface op);
363
364 /// Construct a new operation which is exactly the same as the passed 'op'
365 /// except that the OpResults list is appended by new results of the passed
366 /// 'types'.
367 /// TODO: ideally, this would be implemented using an OpInterface because it
368 /// is used to append function results, loop iter_args, etc. and thus makes
369 /// some assumptions that the variadic list of those is at the end of the
370 /// OpResults range.
371 Operation *appendOpResults(Operation *op, ArrayRef<Type> types);
372
373 /// A convenience template for the generic 'appendOpResults' function above to
374 /// avoid manual casting of the result.
375 template <typename OpTy>
376 OpTy appendOpResults(OpTy op, ArrayRef<Type> types) {
377 return cast<OpTy>(appendOpResults(op.getOperation(), types));
378 }
379
380 /// Performs deallocation of a single basic block. This is a private function
381 /// because some internal data structures have to be set up beforehand and
382 /// this function has to be called on blocks in a region in dominance order.
383 LogicalResult deallocate(Block *block);
384
385 /// After all relevant interfaces of an operation have been processed by the
386 /// 'handleInterface' functions, this function sets the ownership of operation
387 /// results that have not been set yet by the 'handleInterface' functions. It
388 /// generally assumes that each result can alias with every operand of the
389 /// operation, if there are MemRef typed results but no MemRef operands it
390 /// assigns 'false' as ownership. This happens, e.g., for the
391 /// memref.get_global operation. It would also be possible to query some alias
392 /// analysis to get more precise ownerships, however, the analysis would have
393 /// to be updated according to the IR modifications this pass performs (e.g.,
394 /// re-building operations to have more result values, inserting clone
395 /// operations, etc.).
396 void populateRemainingOwnerships(Operation *op);
397
398 /// Given an SSA value of MemRef type, returns the same of a new SSA value
399 /// which has 'Unique' ownership where the ownership indicator is guaranteed
400 /// to be always 'true'.
401 Value materializeMemrefWithGuaranteedOwnership(OpBuilder &builder,
402 Value memref, Block *block);
403
404 /// Returns whether the given operation implements FunctionOpInterface, has
405 /// private visibility, and the private-function-dynamic-ownership pass option
406 /// is enabled.
407 bool isFunctionWithoutDynamicOwnership(Operation *op);
408
409 /// Given an SSA value of MemRef type, this function queries the
410 /// BufferDeallocationOpInterface of the defining operation of 'memref' for a
411 /// materialized ownership indicator for 'memref'. If the op does not
412 /// implement the interface or if the block for which the materialized value
413 /// is requested does not match the block in which 'memref' is defined, the
414 /// default implementation in
415 /// `DeallocationState::getMemrefWithUniqueOwnership` is queried instead.
416 std::pair<Value, Value>
417 materializeUniqueOwnership(OpBuilder &builder, Value memref, Block *block);
418
419 /// Checks all the preconditions for operations implementing the
420 /// FunctionOpInterface that have to hold for the deallocation to be
421 /// applicable:
422 /// (1) Checks that there are not explicit control flow loops.
423 static LogicalResult verifyFunctionPreconditions(FunctionOpInterface op);
424
425 /// Checks all the preconditions for operations inside the region of
426 /// operations implementing the FunctionOpInterface that have to hold for the
427 /// deallocation to be applicable:
428 /// (1) Checks if all operations that have at least one attached region
429 /// implement the RegionBranchOpInterface. This is not required in edge cases,
430 /// where we have a single attached region and the parent operation has no
431 /// results.
432 /// (2) Checks that no deallocations already exist. Especially deallocations
433 /// in nested regions are not properly supported yet since this requires
434 /// ownership of the memref to be transferred to the nested region, which does
435 /// not happen by default. This constrained can be lifted in the future.
436 /// (3) Checks that terminators with more than one successor except
437 /// `cf.cond_br` are not present and that either BranchOpInterface or
438 /// RegionBranchTerminatorOpInterface is implemented.
439 static LogicalResult verifyOperationPreconditions(Operation *op);
440
441 /// When the 'private-function-dynamic-ownership' pass option is enabled,
442 /// additional `i1` return values are added for each MemRef result in the
443 /// function signature. This function takes care of updating the
444 /// `function_type` attribute of the function according to the actually
445 /// returned values from the terminators.
446 static LogicalResult updateFunctionSignature(FunctionOpInterface op);
447
448private:
449 /// Collects all analysis state and including liveness, caches, ownerships of
450 /// already processed values and operations, and the MemRefs that have to be
451 /// deallocated at the end of each block.
452 DeallocationState state;
453
454 /// Collects all pass options in a single place.
455 DeallocationOptions options;
456};
457
458} // namespace
459
460//===----------------------------------------------------------------------===//
461// BufferDeallocation Implementation
462//===----------------------------------------------------------------------===//
463
464std::pair<Value, Value>
465BufferDeallocation::materializeUniqueOwnership(OpBuilder &builder, Value memref,
466 Block *block) {
467 // The interface can only materialize ownership indicators in the same block
468 // as the defining op.
469 if (memref.getParentBlock() != block)
470 return state.getMemrefWithUniqueOwnership(builder, memref, block);
471
472 Operation *owner = memref.getDefiningOp();
473 if (!owner)
474 owner = memref.getParentBlock()->getParentOp();
475
476 // If the op implements the interface, query it for a materialized ownership
477 // value.
478 if (auto deallocOpInterface = dyn_cast<BufferDeallocationOpInterface>(owner))
479 return deallocOpInterface.materializeUniqueOwnershipForMemref(
480 state, options, builder, memref);
481
482 // Otherwise use the default implementation.
483 return state.getMemrefWithUniqueOwnership(builder, memref, block);
484}
485
486LogicalResult
487BufferDeallocation::verifyFunctionPreconditions(FunctionOpInterface op) {
488 // (1) Ensure that there are supported loops only (no explicit control flow
489 // loops).
490 Backedges backedges(op);
491 if (backedges.size()) {
492 op->emitError("Only structured control-flow loops are supported.");
493 return failure();
494 }
495
496 return success();
497}
498
499LogicalResult BufferDeallocation::verifyOperationPreconditions(Operation *op) {
500 // (1) The pass does not work properly when deallocations are already present.
501 // Alternatively, we could also remove all deallocations as a pre-pass.
502 if (isa<DeallocOp>(op))
503 return op->emitError(
504 message: "No deallocation operations must be present when running this pass!");
505
506 // (2) Memory side effects of unregistered ops are unknown. In particular, we
507 // do not know whether an unregistered op allocates memory or not.
508 // - Ops with recursive memory effects are allowed. All nested ops in the
509 // regions of `op` will be analyzed separately.
510 // - Call ops are allowed even though they typically do not implement the
511 // MemoryEffectOpInterface. They usually do not have side effects apart
512 // from the callee, which will be analyzed separately. (This is similar to
513 // "recursive memory effects".)
514 if (!isa<MemoryEffectOpInterface>(op) &&
515 !op->hasTrait<OpTrait::HasRecursiveMemoryEffects>() &&
516 !isa<CallOpInterface>(op))
517 return op->emitError(
518 message: "ops with unknown memory side effects are not supported");
519
520 // We do not care about ops that do not operate on buffers and have no
521 // Allocate/Free side effect.
522 if (!hasBufferSemantics(op) && hasNeitherAllocateNorFreeSideEffect(op))
523 return success();
524
525 // (3) Check that the control flow structures are supported.
526 auto regions = op->getRegions();
527 // Check that if the operation has at
528 // least one region it implements the RegionBranchOpInterface. If there
529 // is an operation that does not fulfill this condition, we cannot apply
530 // the deallocation steps. Furthermore, we accept cases, where we have a
531 // region that returns no results, since, in that case, the intra-region
532 // control flow does not affect the transformation.
533 size_t size = regions.size();
534 if (((size == 1 && !op->getResults().empty()) || size > 1) &&
535 !dyn_cast<RegionBranchOpInterface>(op)) {
536 return op->emitError(message: "All operations with attached regions need to "
537 "implement the RegionBranchOpInterface.");
538 }
539
540 // (3) Check that terminators with more than one successor except `cf.cond_br`
541 // are not present and that either BranchOpInterface or
542 // RegionBranchTerminatorOpInterface is implemented.
543 if (op->hasTrait<OpTrait::NoTerminator>())
544 return op->emitError(message: "NoTerminator trait is not supported");
545
546 if (op->hasTrait<OpTrait::IsTerminator>()) {
547 // Either one of those interfaces has to be implemented on terminators, but
548 // not both.
549 if (!isa<BranchOpInterface, RegionBranchTerminatorOpInterface>(op) ||
550 (isa<BranchOpInterface>(op) &&
551 isa<RegionBranchTerminatorOpInterface>(op)))
552
553 return op->emitError(
554 message: "Terminators must implement either BranchOpInterface or "
555 "RegionBranchTerminatorOpInterface (but not both)!");
556
557 // We only support terminators with 0 or 1 successors for now and
558 // special-case the conditional branch op.
559 if (op->getSuccessors().size() > 1)
560
561 return op->emitError(message: "Terminators with more than one successor "
562 "are not supported!");
563 }
564
565 return success();
566}
567
568LogicalResult
569BufferDeallocation::updateFunctionSignature(FunctionOpInterface op) {
570 SmallVector<TypeRange> returnOperandTypes(llvm::map_range(
571 op.getFunctionBody().getOps<RegionBranchTerminatorOpInterface>(),
572 [](RegionBranchTerminatorOpInterface op) {
573 return op.getSuccessorOperands(RegionBranchPoint::parent()).getTypes();
574 }));
575 if (!llvm::all_equal(Range&: returnOperandTypes))
576 return op->emitError(
577 "there are multiple return operations with different operand types");
578
579 TypeRange resultTypes = op.getResultTypes();
580 // Check if we found a return operation because that doesn't necessarily
581 // always have to be the case, e.g., consider a function with one block that
582 // has a cf.br at the end branching to itself again (i.e., an infinite loop).
583 // In that case we don't want to crash but just not update the return types.
584 if (!returnOperandTypes.empty())
585 resultTypes = returnOperandTypes[0];
586
587 op.setFunctionTypeAttr(TypeAttr::get(FunctionType::get(
588 op->getContext(), op.getFunctionBody().front().getArgumentTypes(),
589 resultTypes)));
590
591 return success();
592}
593
594LogicalResult BufferDeallocation::deallocate(FunctionOpInterface op) {
595 // Stop and emit a proper error message if we don't support the input IR.
596 if (failed(verifyFunctionPreconditions(op: op)))
597 return failure();
598
599 // Process the function block by block.
600 auto result = op->walk<WalkOrder::PostOrder, ForwardDominanceIterator<>>(
601 [&](Block *block) {
602 if (failed(deallocate(block)))
603 return WalkResult::interrupt();
604 return WalkResult::advance();
605 });
606 if (result.wasInterrupted())
607 return failure();
608
609 // Update the function signature if the function is private, dynamic ownership
610 // is enabled, and the function has memrefs as arguments or results.
611 return updateFunctionSignature(op: op);
612}
613
614LogicalResult BufferDeallocation::deallocate(Block *block) {
615 OpBuilder builder = OpBuilder::atBlockBegin(block);
616
617 // Compute liveness transfers of ownership to this block.
618 SmallVector<Value> liveMemrefs;
619 state.getLiveMemrefsIn(block, memrefs&: liveMemrefs);
620 for (auto li : liveMemrefs) {
621 // Ownership of implicitly captured memrefs from other regions is never
622 // taken, but ownership of memrefs in the same region (but different block)
623 // is taken.
624 if (li.getParentRegion() == block->getParent()) {
625 state.updateOwnership(memref: li, ownership: state.getOwnership(memref: li, block: li.getParentBlock()),
626 block);
627 state.addMemrefToDeallocate(memref: li, block);
628 continue;
629 }
630
631 if (li.getParentRegion()->isProperAncestor(other: block->getParent())) {
632 Value falseVal = buildBoolValue(builder, loc: li.getLoc(), value: false);
633 state.updateOwnership(memref: li, ownership: falseVal, block);
634 }
635 }
636
637 for (unsigned i = 0, e = block->getNumArguments(); i < e; ++i) {
638 BlockArgument arg = block->getArgument(i);
639 if (!isMemref(v: arg))
640 continue;
641
642 // Adhere to function boundary ABI: no ownership of function argument
643 // MemRefs is taken.
644 if (isa<FunctionOpInterface>(Val: block->getParentOp()) &&
645 block->isEntryBlock()) {
646 Value newArg = buildBoolValue(builder, loc: arg.getLoc(), value: false);
647 state.updateOwnership(memref: arg, ownership: newArg);
648 state.addMemrefToDeallocate(memref: arg, block);
649 continue;
650 }
651
652 // Pass MemRef ownerships along via `i1` values.
653 Value newArg = block->addArgument(builder.getI1Type(), arg.getLoc());
654 state.updateOwnership(memref: arg, ownership: newArg);
655 state.addMemrefToDeallocate(memref: arg, block);
656 }
657
658 // For each operation in the block, handle the interfaces that affect aliasing
659 // and ownership of memrefs.
660 for (Operation &op : llvm::make_early_inc_range(Range&: *block)) {
661 FailureOr<Operation *> result = handleAllInterfaces(op: &op);
662 if (failed(result))
663 return failure();
664 if (!*result)
665 continue;
666
667 populateRemainingOwnerships(op: *result);
668 }
669
670 // TODO: if block has no terminator, handle dealloc insertion here.
671 return success();
672}
673
674Operation *BufferDeallocation::appendOpResults(Operation *op,
675 ArrayRef<Type> types) {
676 SmallVector<Type> newTypes(op->getResultTypes());
677 newTypes.append(in_start: types.begin(), in_end: types.end());
678 auto *newOp = Operation::create(op->getLoc(), op->getName(), newTypes,
679 op->getOperands(), op->getAttrDictionary(),
680 op->getPropertiesStorage(),
681 op->getSuccessors(), op->getNumRegions());
682 for (auto [oldRegion, newRegion] :
683 llvm::zip(op->getRegions(), newOp->getRegions()))
684 newRegion.takeBody(oldRegion);
685
686 OpBuilder(op).insert(op: newOp);
687 op->replaceAllUsesWith(newOp->getResults().take_front(op->getNumResults()));
688 op->erase();
689
690 return newOp;
691}
692
693FailureOr<Operation *>
694BufferDeallocation::handleInterface(RegionBranchOpInterface op) {
695 OpBuilder builder = OpBuilder::atBlockBegin(block: op->getBlock());
696
697 // TODO: the RegionBranchOpInterface does not provide all the necessary
698 // methods to perform this transformation without additional assumptions on
699 // the structure. In particular, that
700 // * additional values to be passed to the next region can be added to the end
701 // of the operand list, the end of the block argument list, and the end of
702 // the result value list. However, it seems to be the general guideline for
703 // operations implementing this interface to follow this structure.
704 // * and that the block arguments and result values match the forwarded
705 // operands one-to-one (i.e., that there are no other values appended to the
706 // front).
707 // These assumptions are satisfied by the `scf.if`, `scf.for`, and `scf.while`
708 // operations.
709
710 SmallVector<RegionSuccessor> regions;
711 op.getSuccessorRegions(RegionBranchPoint::parent(), regions);
712 assert(!regions.empty() && "Must have at least one successor region");
713 SmallVector<Value> entryOperands(
714 op.getEntrySuccessorOperands(regions.front()));
715 unsigned numMemrefOperands = llvm::count_if(Range&: entryOperands, P: isMemref);
716
717 // No ownership is acquired for any MemRefs that are passed to the region from
718 // the outside.
719 Value falseVal = buildBoolValue(builder, op.getLoc(), false);
720 op->insertOperands(op->getNumOperands(),
721 SmallVector<Value>(numMemrefOperands, falseVal));
722
723 int counter = op->getNumResults();
724 unsigned numMemrefResults = llvm::count_if(op->getResults(), isMemref);
725 SmallVector<Type> ownershipResults(numMemrefResults, builder.getI1Type());
726 RegionBranchOpInterface newOp = appendOpResults(op, ownershipResults);
727
728 for (auto result : llvm::make_filter_range(newOp->getResults(), isMemref)) {
729 state.updateOwnership(result, newOp->getResult(counter++));
730 state.addMemrefToDeallocate(result, newOp->getBlock());
731 }
732
733 return newOp.getOperation();
734}
735
736Value BufferDeallocation::materializeMemrefWithGuaranteedOwnership(
737 OpBuilder &builder, Value memref, Block *block) {
738 // First, make sure we at least have 'Unique' ownership already.
739 std::pair<Value, Value> newMemrefAndOnwership =
740 materializeUniqueOwnership(builder, memref, block);
741 Value newMemref = newMemrefAndOnwership.first;
742 Value condition = newMemrefAndOnwership.second;
743
744 // Avoid inserting additional IR if ownership is already guaranteed. In
745 // particular, this is already the case when we had 'Unknown' ownership
746 // initially and a clone was inserted to get to 'Unique' ownership.
747 if (matchPattern(value: condition, pattern: m_One()))
748 return newMemref;
749
750 // Insert a runtime check and only clone if we still don't have ownership at
751 // runtime.
752 Value maybeClone =
753 builder
754 .create<scf::IfOp>(
755 memref.getLoc(), condition,
756 [&](OpBuilder &builder, Location loc) {
757 builder.create<scf::YieldOp>(loc, newMemref);
758 },
759 [&](OpBuilder &builder, Location loc) {
760 Value clone =
761 builder.create<bufferization::CloneOp>(loc, newMemref);
762 builder.create<scf::YieldOp>(loc, clone);
763 })
764 .getResult(0);
765 Value trueVal = buildBoolValue(builder, loc: memref.getLoc(), value: true);
766 state.updateOwnership(memref: maybeClone, ownership: trueVal);
767 state.addMemrefToDeallocate(memref: maybeClone, block: maybeClone.getParentBlock());
768 return maybeClone;
769}
770
771FailureOr<Operation *>
772BufferDeallocation::handleInterface(BranchOpInterface op) {
773 if (op->getNumSuccessors() > 1)
774 return op->emitError("BranchOpInterface operations with multiple "
775 "successors are not supported yet");
776
777 if (op->getNumSuccessors() != 1)
778 return emitError(op.getLoc(),
779 "only BranchOpInterface operations with exactly "
780 "one successor are supported yet");
781
782 if (op.getSuccessorOperands(0).getProducedOperandCount() > 0)
783 return op.emitError("produced operands are not supported");
784
785 // Collect the values to deallocate and retain and use them to create the
786 // dealloc operation.
787 Block *block = op->getBlock();
788 OpBuilder builder(op);
789 SmallVector<Value> memrefs, conditions, toRetain;
790 if (failed(state.getMemrefsAndConditionsToDeallocate(
791 builder, loc: op.getLoc(), block, memrefs, conditions)))
792 return failure();
793
794 OperandRange forwardedOperands =
795 op.getSuccessorOperands(0).getForwardedOperands();
796 state.getMemrefsToRetain(fromBlock: block, toBlock: op->getSuccessor(0), destOperands: forwardedOperands,
797 toRetain);
798
799 auto deallocOp = builder.create<bufferization::DeallocOp>(
800 op.getLoc(), memrefs, conditions, toRetain);
801
802 // We want to replace the current ownership of the retained values with the
803 // result values of the dealloc operation as they are always unique.
804 state.resetOwnerships(memrefs: deallocOp.getRetained(), block);
805 for (auto [retained, ownership] :
806 llvm::zip(deallocOp.getRetained(), deallocOp.getUpdatedConditions())) {
807 state.updateOwnership(retained, ownership, block);
808 }
809
810 unsigned numAdditionalReturns = llvm::count_if(Range&: forwardedOperands, P: isMemref);
811 SmallVector<Value> newOperands(forwardedOperands);
812 auto additionalConditions =
813 deallocOp.getUpdatedConditions().take_front(numAdditionalReturns);
814 newOperands.append(additionalConditions.begin(), additionalConditions.end());
815 op.getSuccessorOperands(0).getMutableForwardedOperands().assign(newOperands);
816
817 return op.getOperation();
818}
819
820FailureOr<Operation *> BufferDeallocation::handleInterface(CallOpInterface op) {
821 OpBuilder builder(op);
822
823 // Lookup the function operation and check if it has private visibility. If
824 // the function is referenced by SSA value instead of a Symbol, it's assumed
825 // to be always private.
826 Operation *funcOp = op.resolveCallable(state.getSymbolTable());
827 bool isPrivate = true;
828 if (auto symbol = dyn_cast<SymbolOpInterface>(funcOp))
829 isPrivate = symbol.isPrivate() && !symbol.isDeclaration();
830
831 // If the private-function-dynamic-ownership option is enabled and we are
832 // calling a private function, we need to add an additional `i1` result for
833 // each MemRef result to dynamically pass the current ownership indicator
834 // rather than adhering to the function boundary ABI.
835 if (options.privateFuncDynamicOwnership && isPrivate) {
836 unsigned numMemrefs = llvm::count_if(op->getResults(), isMemref);
837 SmallVector<Type> ownershipTypesToAppend(numMemrefs, builder.getI1Type());
838 unsigned ownershipCounter = op->getNumResults();
839 op = appendOpResults(op, ownershipTypesToAppend);
840
841 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
842 state.updateOwnership(result, op->getResult(ownershipCounter++));
843 state.addMemrefToDeallocate(result, result.getParentBlock());
844 }
845
846 return op.getOperation();
847 }
848
849 // According to the function boundary ABI we are guaranteed to get ownership
850 // of all MemRefs returned by the function. Thus we set ownership to constant
851 // 'true' and remember to deallocate it.
852 Value trueVal = buildBoolValue(builder, op.getLoc(), true);
853 for (auto result : llvm::make_filter_range(op->getResults(), isMemref)) {
854 state.updateOwnership(result, trueVal);
855 state.addMemrefToDeallocate(result, result.getParentBlock());
856 }
857
858 return op.getOperation();
859}
860
861FailureOr<Operation *>
862BufferDeallocation::handleInterface(MemoryEffectOpInterface op) {
863 auto *block = op->getBlock();
864 OpBuilder builder = OpBuilder::atBlockBegin(block: block);
865
866 for (auto operand : llvm::make_filter_range(op->getOperands(), isMemref)) {
867 if (op.getEffectOnValue<MemoryEffects::Free>(operand).has_value()) {
868 // The bufferization.manual_deallocation attribute can be attached to ops
869 // with an allocation and/or deallocation side effect. It indicates that
870 // the op is under a "manual deallocation" scheme. Deallocation ops are
871 // usually forbidden in the input IR (not supported by the buffer
872 // deallocation pass). However, if they are under manual deallocation,
873 // they can be safely ignored by the buffer deallocation pass.
874 if (!op->hasAttr(BufferizationDialect::kManualDeallocation))
875 return op->emitError(
876 "memory free side-effect on MemRef value not supported!");
877
878 // Buffers that were allocated under "manual deallocation" may be
879 // manually deallocated. We insert a runtime assertion to cover certain
880 // cases of invalid IR where an automatically managed buffer allocation
881 // is manually deallocated. This is not a bulletproof check!
882 OpBuilder::InsertionGuard g(builder);
883 builder.setInsertionPoint(op);
884 Ownership ownership = state.getOwnership(operand, block);
885 if (ownership.isUnique()) {
886 Value ownershipInverted = builder.create<arith::XOrIOp>(
887 op.getLoc(), ownership.getIndicator(),
888 buildBoolValue(builder, op.getLoc(), true));
889 builder.create<cf::AssertOp>(
890 op.getLoc(), ownershipInverted,
891 "expected that the block does not have ownership");
892 }
893 }
894 }
895
896 for (auto res : llvm::make_filter_range(op->getResults(), isMemref)) {
897 auto allocEffect = op.getEffectOnValue<MemoryEffects::Allocate>(res);
898 if (allocEffect.has_value()) {
899 if (isa<SideEffects::AutomaticAllocationScopeResource>(
900 allocEffect->getResource())) {
901 // Make sure that the ownership of auto-managed allocations is set to
902 // false. This is important for operations that have at least one memref
903 // typed operand. E.g., consider an operation like `bufferization.clone`
904 // that lowers to a `memref.alloca + memref.copy` instead of a
905 // `memref.alloc`. If we wouldn't set the ownership of the result here,
906 // the default ownership population in `populateRemainingOwnerships`
907 // would assume aliasing with the MemRef operand.
908 state.resetOwnerships(res, block);
909 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
910 continue;
911 }
912
913 if (op->hasAttr(BufferizationDialect::kManualDeallocation)) {
914 // This allocation will be deallocated manually. Assign an ownership of
915 // "false", so that it will never be deallocated by the buffer
916 // deallocation pass.
917 state.resetOwnerships(res, block);
918 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), false));
919 continue;
920 }
921
922 state.updateOwnership(res, buildBoolValue(builder, op.getLoc(), true));
923 state.addMemrefToDeallocate(res, block);
924 }
925 }
926
927 return op.getOperation();
928}
929
930FailureOr<Operation *>
931BufferDeallocation::handleInterface(RegionBranchTerminatorOpInterface op) {
932 OpBuilder builder(op);
933
934 // If this is a return operation of a function that is not private or the
935 // dynamic function boundary ownership is disabled, we need to return memref
936 // values for which we have guaranteed ownership to pass on to adhere to the
937 // function boundary ABI.
938 bool funcWithoutDynamicOwnership =
939 isFunctionWithoutDynamicOwnership(op: op->getParentOp());
940 if (funcWithoutDynamicOwnership) {
941 for (OpOperand &val : op->getOpOperands()) {
942 if (!isMemref(val.get()))
943 continue;
944
945 val.set(materializeMemrefWithGuaranteedOwnership(builder, val.get(),
946 op->getBlock()));
947 }
948 }
949
950 // TODO: getSuccessorRegions is not implemented by all operations we care
951 // about, but we would need to check how many successors there are and under
952 // which condition they are taken, etc.
953
954 MutableOperandRange operands =
955 op.getMutableSuccessorOperands(RegionBranchPoint::parent());
956
957 SmallVector<Value> updatedOwnerships;
958 auto result = deallocation_impl::insertDeallocOpForReturnLike(
959 state, op: op, operands: operands.getAsOperandRange(), updatedOperandOwnerships&: updatedOwnerships);
960 if (failed(result) || !*result)
961 return result;
962
963 // Add an additional operand for every MemRef for the ownership indicator.
964 if (!funcWithoutDynamicOwnership) {
965 SmallVector<Value> newOperands{operands.getAsOperandRange()};
966 newOperands.append(in_start: updatedOwnerships.begin(), in_end: updatedOwnerships.end());
967 operands.assign(values: newOperands);
968 }
969
970 return op.getOperation();
971}
972
973bool BufferDeallocation::isFunctionWithoutDynamicOwnership(Operation *op) {
974 auto funcOp = dyn_cast<FunctionOpInterface>(op);
975 return funcOp && (!options.privateFuncDynamicOwnership ||
976 !funcOp.isPrivate() || funcOp.isExternal());
977}
978
979void BufferDeallocation::populateRemainingOwnerships(Operation *op) {
980 for (auto res : op->getResults()) {
981 if (!isMemref(v: res))
982 continue;
983 if (!state.getOwnership(memref: res, block: op->getBlock()).isUninitialized())
984 continue;
985
986 // The op does not allocate memory, otherwise, it would have been assigned
987 // an ownership during `handleInterface`. Assume the result may alias with
988 // any memref operand and thus combine all their ownerships.
989 for (auto operand : op->getOperands()) {
990 if (!isMemref(v: operand))
991 continue;
992
993 state.updateOwnership(
994 memref: res, ownership: state.getOwnership(memref: operand, block: operand.getParentBlock()),
995 block: op->getBlock());
996 }
997
998 // If the ownership value is still uninitialized (e.g., because the op has
999 // no memref operands), assume that no ownership is taken. E.g., this is the
1000 // case for "memref.get_global".
1001 //
1002 // Note: This can lead to memory leaks if memory side effects are not
1003 // properly specified on the op.
1004 if (state.getOwnership(memref: res, block: op->getBlock()).isUninitialized()) {
1005 OpBuilder builder(op);
1006 state.updateOwnership(memref: res, ownership: buildBoolValue(builder, loc: op->getLoc(), value: false));
1007 }
1008 }
1009}
1010
1011//===----------------------------------------------------------------------===//
1012// OwnershipBasedBufferDeallocationPass
1013//===----------------------------------------------------------------------===//
1014
1015namespace {
1016
1017/// The actual buffer deallocation pass that inserts and moves dealloc nodes
1018/// into the right positions. Furthermore, it inserts additional clones if
1019/// necessary. It uses the algorithm described at the top of the file.
1020struct OwnershipBasedBufferDeallocationPass
1021 : public bufferization::impl::OwnershipBasedBufferDeallocationBase<
1022 OwnershipBasedBufferDeallocationPass> {
1023 OwnershipBasedBufferDeallocationPass() = default;
1024 OwnershipBasedBufferDeallocationPass(DeallocationOptions options)
1025 : OwnershipBasedBufferDeallocationPass() {
1026 this->privateFuncDynamicOwnership.setValue(
1027 options.privateFuncDynamicOwnership);
1028 }
1029 void runOnOperation() override {
1030 DeallocationOptions options;
1031 options.privateFuncDynamicOwnership = privateFuncDynamicOwnership;
1032
1033 auto status = getOperation()->walk([&](func::FuncOp func) {
1034 if (func.isExternal())
1035 return WalkResult::skip();
1036
1037 if (failed(deallocateBuffersOwnershipBased(func, options)))
1038 return WalkResult::interrupt();
1039
1040 return WalkResult::advance();
1041 });
1042 if (status.wasInterrupted())
1043 signalPassFailure();
1044 }
1045};
1046
1047} // namespace
1048
1049//===----------------------------------------------------------------------===//
1050// Implement bufferization API
1051//===----------------------------------------------------------------------===//
1052
1053LogicalResult
1054bufferization::deallocateBuffersOwnershipBased(FunctionOpInterface op,
1055 DeallocationOptions options) {
1056 // Gather all required allocation nodes and prepare the deallocation phase.
1057 BufferDeallocation deallocation(op, options);
1058
1059 // Place all required temporary clone and dealloc nodes.
1060 return deallocation.deallocate(op);
1061}
1062
1063//===----------------------------------------------------------------------===//
1064// OwnershipBasedBufferDeallocationPass construction
1065//===----------------------------------------------------------------------===//
1066
1067std::unique_ptr<Pass>
1068mlir::bufferization::createOwnershipBasedBufferDeallocationPass(
1069 DeallocationOptions options) {
1070 return std::make_unique<OwnershipBasedBufferDeallocationPass>(args&: options);
1071}
1072

source code of mlir/lib/Dialect/Bufferization/Transforms/OwnershipBasedBufferDeallocation.cpp