1 | //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements logic for three optimization passes. The first two |
10 | // passes try to move alloc nodes out of blocks to reduce the number of |
11 | // allocations and copies during buffer deallocation. The third pass tries to |
12 | // convert heap-based allocations to stack-based allocations, if possible. |
13 | |
14 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h" |
15 | |
16 | #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" |
17 | #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" |
18 | #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" |
19 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
20 | #include "mlir/Dialect/MemRef/IR/MemRef.h" |
21 | #include "mlir/IR/Operation.h" |
22 | #include "mlir/Interfaces/LoopLikeInterface.h" |
23 | #include "mlir/Pass/Pass.h" |
24 | |
25 | namespace mlir { |
26 | namespace bufferization { |
27 | #define GEN_PASS_DEF_BUFFERHOISTINGPASS |
28 | #define GEN_PASS_DEF_BUFFERLOOPHOISTINGPASS |
29 | #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACKPASS |
30 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" |
31 | } // namespace bufferization |
32 | } // namespace mlir |
33 | |
34 | using namespace mlir; |
35 | using namespace mlir::bufferization; |
36 | |
37 | /// Returns true if the given operation implements a known high-level region- |
38 | /// based control-flow interface. |
39 | static bool isKnownControlFlowInterface(Operation *op) { |
40 | return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); |
41 | } |
42 | |
43 | /// Returns true if the given operation represents a loop by testing whether it |
44 | /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In |
45 | /// the case of a `RegionBranchOpInterface`, it checks all region-based control- |
46 | /// flow edges for cycles. |
47 | static bool isLoop(Operation *op) { |
48 | // If the operation implements the `LoopLikeOpInterface` it can be considered |
49 | // a loop. |
50 | if (isa<LoopLikeOpInterface>(op)) |
51 | return true; |
52 | |
53 | // If the operation does not implement the `RegionBranchOpInterface`, it is |
54 | // (currently) not possible to detect a loop. |
55 | auto regionInterface = dyn_cast<RegionBranchOpInterface>(op); |
56 | if (!regionInterface) |
57 | return false; |
58 | |
59 | return regionInterface.hasLoop(); |
60 | } |
61 | |
62 | /// Return whether the given operation is a loop with sequential execution |
63 | /// semantics. |
64 | static bool isSequentialLoop(Operation *op) { |
65 | return !op->hasTrait<OpTrait::HasParallelRegion>() && isLoop(op); |
66 | } |
67 | |
68 | /// Returns true if the given operation implements the AllocationOpInterface |
69 | /// and it supports the dominate block hoisting. |
70 | static bool allowAllocDominateBlockHoisting(Operation *op) { |
71 | auto allocOp = dyn_cast<AllocationOpInterface>(op); |
72 | return allocOp && |
73 | static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block); |
74 | } |
75 | |
76 | /// Returns true if the given operation implements the AllocationOpInterface |
77 | /// and it supports the loop hoisting. |
78 | static bool allowAllocLoopHoisting(Operation *op) { |
79 | auto allocOp = dyn_cast<AllocationOpInterface>(op); |
80 | return allocOp && |
81 | static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop); |
82 | } |
83 | |
84 | /// Check if the size of the allocation is less than the given size. The |
85 | /// transformation is only applied to small buffers since large buffers could |
86 | /// exceed the stack space. |
87 | static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, |
88 | unsigned maxRankOfAllocatedMemRef) { |
89 | auto type = dyn_cast<ShapedType>(alloc.getType()); |
90 | if (!type || !alloc.getDefiningOp<memref::AllocOp>()) |
91 | return false; |
92 | if (!type.hasStaticShape()) { |
93 | // Check if the dynamic shape dimension of the alloc is produced by |
94 | // `memref.rank`. If this is the case, it is likely to be small. |
95 | // Furthermore, the dimension is limited to the maximum rank of the |
96 | // allocated memref to avoid large values by multiplying several small |
97 | // values. |
98 | if (type.getRank() <= maxRankOfAllocatedMemRef) { |
99 | return llvm::all_of(Range: alloc.getDefiningOp()->getOperands(), |
100 | P: [&](Value operand) { |
101 | return operand.getDefiningOp<memref::RankOp>(); |
102 | }); |
103 | } |
104 | return false; |
105 | } |
106 | unsigned bitwidth = mlir::DataLayout::closest(op: alloc.getDefiningOp()) |
107 | .getTypeSizeInBits(t: type.getElementType()); |
108 | return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; |
109 | } |
110 | |
111 | /// Checks whether the given aliases leave the allocation scope. |
112 | static bool |
113 | leavesAllocationScope(Region *parentRegion, |
114 | const BufferViewFlowAnalysis::ValueSetT &aliases) { |
115 | for (Value alias : aliases) { |
116 | for (auto *use : alias.getUsers()) { |
117 | // If there is at least one alias that leaves the parent region, we know |
118 | // that this alias escapes the whole region and hence the associated |
119 | // allocation leaves allocation scope. |
120 | if (isa<RegionBranchTerminatorOpInterface>(use) && |
121 | use->getParentRegion() == parentRegion) |
122 | return true; |
123 | } |
124 | } |
125 | return false; |
126 | } |
127 | |
128 | /// Checks, if an automated allocation scope for a given alloc value exists. |
129 | static bool hasAllocationScope(Value alloc, |
130 | const BufferViewFlowAnalysis &aliasAnalysis) { |
131 | Region *region = alloc.getParentRegion(); |
132 | do { |
133 | if (Operation *parentOp = region->getParentOp()) { |
134 | // Check if the operation is an automatic allocation scope and whether an |
135 | // alias leaves the scope. This means, an allocation yields out of |
136 | // this scope and can not be transformed in a stack-based allocation. |
137 | if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && |
138 | !leavesAllocationScope(parentRegion: region, aliases: aliasAnalysis.resolve(value: alloc))) |
139 | return true; |
140 | // Check if the operation is a known control flow interface and break the |
141 | // loop to avoid transformation in loops. Furthermore skip transformation |
142 | // if the operation does not implement a RegionBeanchOpInterface. |
143 | if (isLoop(op: parentOp) || !isKnownControlFlowInterface(op: parentOp)) |
144 | break; |
145 | } |
146 | } while ((region = region->getParentRegion())); |
147 | return false; |
148 | } |
149 | |
150 | namespace { |
151 | |
152 | //===----------------------------------------------------------------------===// |
153 | // BufferAllocationHoisting |
154 | //===----------------------------------------------------------------------===// |
155 | |
156 | /// A base implementation compatible with the `BufferAllocationHoisting` class. |
157 | struct BufferAllocationHoistingStateBase { |
158 | /// A pointer to the current dominance info. |
159 | DominanceInfo *dominators; |
160 | |
161 | /// The current allocation value. |
162 | Value allocValue; |
163 | |
164 | /// The current placement block (if any). |
165 | Block *placementBlock; |
166 | |
167 | /// Initializes the state base. |
168 | BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, |
169 | Block *placementBlock) |
170 | : dominators(dominators), allocValue(allocValue), |
171 | placementBlock(placementBlock) {} |
172 | }; |
173 | |
174 | /// Implements the actual hoisting logic for allocation nodes. |
175 | template <typename StateT> |
176 | class BufferAllocationHoisting : public BufferPlacementTransformationBase { |
177 | public: |
178 | BufferAllocationHoisting(Operation *op) |
179 | : BufferPlacementTransformationBase(op), dominators(op), |
180 | postDominators(op), scopeOp(op) {} |
181 | |
182 | /// Moves allocations upwards. |
183 | void hoist() { |
184 | SmallVector<Value> allocsAndAllocas; |
185 | for (BufferPlacementAllocs::AllocEntry &entry : allocs) |
186 | allocsAndAllocas.push_back(Elt: std::get<0>(t&: entry)); |
187 | scopeOp->walk([&](memref::AllocaOp op) { |
188 | allocsAndAllocas.push_back(Elt: op.getMemref()); |
189 | }); |
190 | |
191 | for (auto allocValue : allocsAndAllocas) { |
192 | if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) |
193 | continue; |
194 | Operation *definingOp = allocValue.getDefiningOp(); |
195 | assert(definingOp && "No defining op" ); |
196 | auto operands = definingOp->getOperands(); |
197 | auto resultAliases = aliases.resolve(value: allocValue); |
198 | // Determine the common dominator block of all aliases. |
199 | Block *dominatorBlock = |
200 | findCommonDominator(allocValue, resultAliases, dominators); |
201 | // Init the initial hoisting state. |
202 | StateT state(&dominators, allocValue, allocValue.getParentBlock()); |
203 | // Check for additional allocation dependencies to compute an upper bound |
204 | // for hoisting. |
205 | Block *dependencyBlock = nullptr; |
206 | // If this node has dependencies, check all dependent nodes. This ensures |
207 | // that all dependency values have been computed before allocating the |
208 | // buffer. |
209 | for (Value depValue : operands) { |
210 | Block *depBlock = depValue.getParentBlock(); |
211 | if (!dependencyBlock || dominators.dominates(a: dependencyBlock, b: depBlock)) |
212 | dependencyBlock = depBlock; |
213 | } |
214 | |
215 | // Find the actual placement block and determine the start operation using |
216 | // an upper placement-block boundary. The idea is that placement block |
217 | // cannot be moved any further upwards than the given upper bound. |
218 | Block *placementBlock = findPlacementBlock( |
219 | state, upperBound: state.computeUpperBound(dominatorBlock, dependencyBlock)); |
220 | Operation *startOperation = BufferPlacementAllocs::getStartOperation( |
221 | allocValue, placementBlock, liveness); |
222 | |
223 | // Move the alloc in front of the start operation. |
224 | Operation *allocOperation = allocValue.getDefiningOp(); |
225 | allocOperation->moveBefore(existingOp: startOperation); |
226 | } |
227 | } |
228 | |
229 | private: |
230 | /// Finds a valid placement block by walking upwards in the CFG until we |
231 | /// either cannot continue our walk due to constraints (given by the StateT |
232 | /// implementation) or we have reached the upper-most dominator block. |
233 | Block *findPlacementBlock(StateT &state, Block *upperBound) { |
234 | Block *currentBlock = state.placementBlock; |
235 | // Walk from the innermost regions/loops to the outermost regions/loops and |
236 | // find an appropriate placement block that satisfies the constraint of the |
237 | // current StateT implementation. Walk until we reach the upperBound block |
238 | // (if any). |
239 | |
240 | // If we are not able to find a valid parent operation or an associated |
241 | // parent block, break the walk loop. |
242 | Operation *parentOp; |
243 | Block *parentBlock; |
244 | while ((parentOp = currentBlock->getParentOp()) && |
245 | (parentBlock = parentOp->getBlock()) && |
246 | (!upperBound || |
247 | dominators.properlyDominates(a: upperBound, b: currentBlock))) { |
248 | // Try to find an immediate dominator and check whether the parent block |
249 | // is above the immediate dominator (if any). |
250 | DominanceInfoNode *idom = nullptr; |
251 | |
252 | // DominanceInfo doesn't support getNode queries for single-block regions. |
253 | if (!currentBlock->isEntryBlock()) |
254 | idom = dominators.getNode(a: currentBlock)->getIDom(); |
255 | |
256 | if (idom && dominators.properlyDominates(a: parentBlock, b: idom->getBlock())) { |
257 | // If the current immediate dominator is below the placement block, move |
258 | // to the immediate dominator block. |
259 | currentBlock = idom->getBlock(); |
260 | state.recordMoveToDominator(currentBlock); |
261 | } else { |
262 | // We have to move to our parent block since an immediate dominator does |
263 | // either not exist or is above our parent block. If we cannot move to |
264 | // our parent operation due to constraints given by the StateT |
265 | // implementation, break the walk loop. Furthermore, we should not move |
266 | // allocations out of unknown region-based control-flow operations. |
267 | if (!isKnownControlFlowInterface(op: parentOp) || |
268 | !state.isLegalPlacement(parentOp)) |
269 | break; |
270 | // Move to our parent block by notifying the current StateT |
271 | // implementation. |
272 | currentBlock = parentBlock; |
273 | state.recordMoveToParent(currentBlock); |
274 | } |
275 | } |
276 | // Return the finally determined placement block. |
277 | return state.placementBlock; |
278 | } |
279 | |
280 | /// The dominator info to find the appropriate start operation to move the |
281 | /// allocs. |
282 | DominanceInfo dominators; |
283 | |
284 | /// The post dominator info to move the dependent allocs in the right |
285 | /// position. |
286 | PostDominanceInfo postDominators; |
287 | |
288 | /// The map storing the final placement blocks of a given alloc value. |
289 | llvm::DenseMap<Value, Block *> placementBlocks; |
290 | |
291 | /// The operation that this transformation is working on. It is used to also |
292 | /// gather allocas. |
293 | Operation *scopeOp; |
294 | }; |
295 | |
296 | /// A state implementation compatible with the `BufferAllocationHoisting` class |
297 | /// that hoists allocations into dominator blocks while keeping them inside of |
298 | /// loops. |
299 | struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { |
300 | using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; |
301 | |
302 | /// Computes the upper bound for the placement block search. |
303 | Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { |
304 | // If we do not have a dependency block, the upper bound is given by the |
305 | // dominator block. |
306 | if (!dependencyBlock) |
307 | return dominatorBlock; |
308 | |
309 | // Find the "lower" block of the dominator and the dependency block to |
310 | // ensure that we do not move allocations above this block. |
311 | return dominators->properlyDominates(a: dominatorBlock, b: dependencyBlock) |
312 | ? dependencyBlock |
313 | : dominatorBlock; |
314 | } |
315 | |
316 | /// Returns true if the given operation does not represent a loop. |
317 | bool isLegalPlacement(Operation *op) { return !isLoop(op); } |
318 | |
319 | /// Returns true if the given operation should be considered for hoisting. |
320 | static bool shouldHoistOpType(Operation *op) { |
321 | return allowAllocDominateBlockHoisting(op); |
322 | } |
323 | |
324 | /// Sets the current placement block to the given block. |
325 | void recordMoveToDominator(Block *block) { placementBlock = block; } |
326 | |
327 | /// Sets the current placement block to the given block. |
328 | void recordMoveToParent(Block *block) { recordMoveToDominator(block); } |
329 | }; |
330 | |
331 | /// A state implementation compatible with the `BufferAllocationHoisting` class |
332 | /// that hoists allocations out of loops. |
333 | struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { |
334 | using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; |
335 | |
336 | /// Remembers the dominator block of all aliases. |
337 | Block *aliasDominatorBlock = nullptr; |
338 | |
339 | /// Computes the upper bound for the placement block search. |
340 | Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { |
341 | aliasDominatorBlock = dominatorBlock; |
342 | // If there is a dependency block, we have to use this block as an upper |
343 | // bound to satisfy all allocation value dependencies. |
344 | return dependencyBlock ? dependencyBlock : nullptr; |
345 | } |
346 | |
347 | /// Returns true if the given operation represents a loop with sequential |
348 | /// execution semantics and one of the aliases caused the |
349 | /// `aliasDominatorBlock` to be "above" the block of the given loop operation. |
350 | /// If this is the case, it indicates that the allocation is passed via a back |
351 | /// edge. |
352 | bool isLegalPlacement(Operation *op) { |
353 | return isSequentialLoop(op) && |
354 | !dominators->dominates(a: aliasDominatorBlock, b: op->getBlock()); |
355 | } |
356 | |
357 | /// Returns true if the given operation should be considered for hoisting. |
358 | static bool shouldHoistOpType(Operation *op) { |
359 | return allowAllocLoopHoisting(op); |
360 | } |
361 | |
362 | /// Does not change the internal placement block, as we want to move |
363 | /// operations out of loops only. |
364 | void recordMoveToDominator(Block *block) {} |
365 | |
366 | /// Sets the current placement block to the given block. |
367 | void recordMoveToParent(Block *block) { placementBlock = block; } |
368 | }; |
369 | |
370 | //===----------------------------------------------------------------------===// |
371 | // BufferPlacementPromotion |
372 | //===----------------------------------------------------------------------===// |
373 | |
374 | /// Promotes heap-based allocations to stack-based allocations (if possible). |
375 | class BufferPlacementPromotion : BufferPlacementTransformationBase { |
376 | public: |
377 | BufferPlacementPromotion(Operation *op) |
378 | : BufferPlacementTransformationBase(op) {} |
379 | |
380 | /// Promote buffers to stack-based allocations. |
381 | void promote(function_ref<bool(Value)> isSmallAlloc) { |
382 | for (BufferPlacementAllocs::AllocEntry &entry : allocs) { |
383 | Value alloc = std::get<0>(t&: entry); |
384 | Operation *dealloc = std::get<1>(t&: entry); |
385 | // Checking several requirements to transform an AllocOp into an AllocaOp. |
386 | // The transformation is done if the allocation is limited to a given |
387 | // size. Furthermore, a deallocation must not be defined for this |
388 | // allocation entry and a parent allocation scope must exist. |
389 | if (!isSmallAlloc(alloc) || dealloc || |
390 | !hasAllocationScope(alloc, aliasAnalysis: aliases)) |
391 | continue; |
392 | |
393 | Operation *startOperation = BufferPlacementAllocs::getStartOperation( |
394 | allocValue: alloc, placementBlock: alloc.getParentBlock(), liveness); |
395 | // Build a new alloca that is associated with its parent |
396 | // `AutomaticAllocationScope` determined during the initialization phase. |
397 | OpBuilder builder(startOperation); |
398 | Operation *allocOp = alloc.getDefiningOp(); |
399 | if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) { |
400 | std::optional<Operation *> alloca = |
401 | allocInterface.buildPromotedAlloc(builder, alloc); |
402 | if (!alloca) |
403 | continue; |
404 | // Replace the original alloc by a newly created alloca. |
405 | allocOp->replaceAllUsesWith(values&: alloca.value()); |
406 | allocOp->erase(); |
407 | } |
408 | } |
409 | } |
410 | }; |
411 | |
412 | //===----------------------------------------------------------------------===// |
413 | // BufferOptimizationPasses |
414 | //===----------------------------------------------------------------------===// |
415 | |
416 | /// The buffer hoisting pass that hoists allocation nodes into dominating |
417 | /// blocks. |
418 | struct BufferHoistingPass |
419 | : public bufferization::impl::BufferHoistingPassBase<BufferHoistingPass> { |
420 | |
421 | void runOnOperation() override { |
422 | // Hoist all allocations into dominator blocks. |
423 | BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( |
424 | getOperation()); |
425 | optimizer.hoist(); |
426 | } |
427 | }; |
428 | |
429 | /// The buffer loop hoisting pass that hoists allocation nodes out of loops. |
430 | struct BufferLoopHoistingPass |
431 | : public bufferization::impl::BufferLoopHoistingPassBase< |
432 | BufferLoopHoistingPass> { |
433 | |
434 | void runOnOperation() override { |
435 | // Hoist all allocations out of loops. |
436 | hoistBuffersFromLoops(getOperation()); |
437 | } |
438 | }; |
439 | |
440 | /// The promote buffer to stack pass that tries to convert alloc nodes into |
441 | /// alloca nodes. |
442 | class PromoteBuffersToStackPass |
443 | : public bufferization::impl::PromoteBuffersToStackPassBase< |
444 | PromoteBuffersToStackPass> { |
445 | using Base::Base; |
446 | |
447 | public: |
448 | explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) |
449 | : isSmallAlloc(std::move(isSmallAlloc)) {} |
450 | |
451 | LogicalResult initialize(MLIRContext *context) override { |
452 | if (isSmallAlloc == nullptr) { |
453 | isSmallAlloc = [=](Value alloc) { |
454 | return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, |
455 | maxRankOfAllocatedMemRef); |
456 | }; |
457 | } |
458 | return success(); |
459 | } |
460 | |
461 | void runOnOperation() override { |
462 | // Move all allocation nodes and convert candidates into allocas. |
463 | BufferPlacementPromotion optimizer(getOperation()); |
464 | optimizer.promote(isSmallAlloc); |
465 | } |
466 | |
467 | private: |
468 | std::function<bool(Value)> isSmallAlloc; |
469 | }; |
470 | |
471 | } // namespace |
472 | |
473 | void mlir::bufferization::hoistBuffersFromLoops(Operation *op) { |
474 | BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op); |
475 | optimizer.hoist(); |
476 | } |
477 | |
478 | std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( |
479 | std::function<bool(Value)> isSmallAlloc) { |
480 | return std::make_unique<PromoteBuffersToStackPass>(args: std::move(isSmallAlloc)); |
481 | } |
482 | |