1 | //===- BufferOptimizations.cpp - pre-pass optimizations for bufferization -===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file implements logic for three optimization passes. The first two |
10 | // passes try to move alloc nodes out of blocks to reduce the number of |
11 | // allocations and copies during buffer deallocation. The third pass tries to |
12 | // convert heap-based allocations to stack-based allocations, if possible. |
13 | |
14 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h" |
15 | |
16 | #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h" |
17 | #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h" |
18 | #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" |
19 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
20 | #include "mlir/Dialect/MemRef/IR/MemRef.h" |
21 | #include "mlir/IR/Operation.h" |
22 | #include "mlir/Interfaces/LoopLikeInterface.h" |
23 | #include "mlir/Pass/Pass.h" |
24 | |
25 | namespace mlir { |
26 | namespace bufferization { |
27 | #define GEN_PASS_DEF_BUFFERHOISTING |
28 | #define GEN_PASS_DEF_BUFFERLOOPHOISTING |
29 | #define GEN_PASS_DEF_PROMOTEBUFFERSTOSTACK |
30 | #include "mlir/Dialect/Bufferization/Transforms/Passes.h.inc" |
31 | } // namespace bufferization |
32 | } // namespace mlir |
33 | |
34 | using namespace mlir; |
35 | using namespace mlir::bufferization; |
36 | |
37 | /// Returns true if the given operation implements a known high-level region- |
38 | /// based control-flow interface. |
39 | static bool isKnownControlFlowInterface(Operation *op) { |
40 | return isa<LoopLikeOpInterface, RegionBranchOpInterface>(op); |
41 | } |
42 | |
43 | /// Returns true if the given operation represents a loop by testing whether it |
44 | /// implements the `LoopLikeOpInterface` or the `RegionBranchOpInterface`. In |
45 | /// the case of a `RegionBranchOpInterface`, it checks all region-based control- |
46 | /// flow edges for cycles. |
47 | static bool isLoop(Operation *op) { |
48 | // If the operation implements the `LoopLikeOpInterface` it can be considered |
49 | // a loop. |
50 | if (isa<LoopLikeOpInterface>(op)) |
51 | return true; |
52 | |
53 | // If the operation does not implement the `RegionBranchOpInterface`, it is |
54 | // (currently) not possible to detect a loop. |
55 | auto regionInterface = dyn_cast<RegionBranchOpInterface>(op); |
56 | if (!regionInterface) |
57 | return false; |
58 | |
59 | return regionInterface.hasLoop(); |
60 | } |
61 | |
62 | /// Returns true if the given operation implements the AllocationOpInterface |
63 | /// and it supports the dominate block hoisting. |
64 | static bool allowAllocDominateBlockHoisting(Operation *op) { |
65 | auto allocOp = dyn_cast<AllocationOpInterface>(op); |
66 | return allocOp && |
67 | static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Block); |
68 | } |
69 | |
70 | /// Returns true if the given operation implements the AllocationOpInterface |
71 | /// and it supports the loop hoisting. |
72 | static bool allowAllocLoopHoisting(Operation *op) { |
73 | auto allocOp = dyn_cast<AllocationOpInterface>(op); |
74 | return allocOp && |
75 | static_cast<uint8_t>(allocOp.getHoistingKind() & HoistingKind::Loop); |
76 | } |
77 | |
78 | /// Check if the size of the allocation is less than the given size. The |
79 | /// transformation is only applied to small buffers since large buffers could |
80 | /// exceed the stack space. |
81 | static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes, |
82 | unsigned maxRankOfAllocatedMemRef) { |
83 | auto type = dyn_cast<ShapedType>(alloc.getType()); |
84 | if (!type || !alloc.getDefiningOp<memref::AllocOp>()) |
85 | return false; |
86 | if (!type.hasStaticShape()) { |
87 | // Check if the dynamic shape dimension of the alloc is produced by |
88 | // `memref.rank`. If this is the case, it is likely to be small. |
89 | // Furthermore, the dimension is limited to the maximum rank of the |
90 | // allocated memref to avoid large values by multiplying several small |
91 | // values. |
92 | if (type.getRank() <= maxRankOfAllocatedMemRef) { |
93 | return llvm::all_of(Range: alloc.getDefiningOp()->getOperands(), |
94 | P: [&](Value operand) { |
95 | return operand.getDefiningOp<memref::RankOp>(); |
96 | }); |
97 | } |
98 | return false; |
99 | } |
100 | unsigned bitwidth = mlir::DataLayout::closest(op: alloc.getDefiningOp()) |
101 | .getTypeSizeInBits(t: type.getElementType()); |
102 | return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8; |
103 | } |
104 | |
105 | /// Checks whether the given aliases leave the allocation scope. |
106 | static bool |
107 | leavesAllocationScope(Region *parentRegion, |
108 | const BufferViewFlowAnalysis::ValueSetT &aliases) { |
109 | for (Value alias : aliases) { |
110 | for (auto *use : alias.getUsers()) { |
111 | // If there is at least one alias that leaves the parent region, we know |
112 | // that this alias escapes the whole region and hence the associated |
113 | // allocation leaves allocation scope. |
114 | if (isa<RegionBranchTerminatorOpInterface>(use) && |
115 | use->getParentRegion() == parentRegion) |
116 | return true; |
117 | } |
118 | } |
119 | return false; |
120 | } |
121 | |
122 | /// Checks, if an automated allocation scope for a given alloc value exists. |
123 | static bool hasAllocationScope(Value alloc, |
124 | const BufferViewFlowAnalysis &aliasAnalysis) { |
125 | Region *region = alloc.getParentRegion(); |
126 | do { |
127 | if (Operation *parentOp = region->getParentOp()) { |
128 | // Check if the operation is an automatic allocation scope and whether an |
129 | // alias leaves the scope. This means, an allocation yields out of |
130 | // this scope and can not be transformed in a stack-based allocation. |
131 | if (parentOp->hasTrait<OpTrait::AutomaticAllocationScope>() && |
132 | !leavesAllocationScope(parentRegion: region, aliases: aliasAnalysis.resolve(value: alloc))) |
133 | return true; |
134 | // Check if the operation is a known control flow interface and break the |
135 | // loop to avoid transformation in loops. Furthermore skip transformation |
136 | // if the operation does not implement a RegionBeanchOpInterface. |
137 | if (isLoop(op: parentOp) || !isKnownControlFlowInterface(op: parentOp)) |
138 | break; |
139 | } |
140 | } while ((region = region->getParentRegion())); |
141 | return false; |
142 | } |
143 | |
144 | namespace { |
145 | |
146 | //===----------------------------------------------------------------------===// |
147 | // BufferAllocationHoisting |
148 | //===----------------------------------------------------------------------===// |
149 | |
150 | /// A base implementation compatible with the `BufferAllocationHoisting` class. |
151 | struct BufferAllocationHoistingStateBase { |
152 | /// A pointer to the current dominance info. |
153 | DominanceInfo *dominators; |
154 | |
155 | /// The current allocation value. |
156 | Value allocValue; |
157 | |
158 | /// The current placement block (if any). |
159 | Block *placementBlock; |
160 | |
161 | /// Initializes the state base. |
162 | BufferAllocationHoistingStateBase(DominanceInfo *dominators, Value allocValue, |
163 | Block *placementBlock) |
164 | : dominators(dominators), allocValue(allocValue), |
165 | placementBlock(placementBlock) {} |
166 | }; |
167 | |
168 | /// Implements the actual hoisting logic for allocation nodes. |
169 | template <typename StateT> |
170 | class BufferAllocationHoisting : public BufferPlacementTransformationBase { |
171 | public: |
172 | BufferAllocationHoisting(Operation *op) |
173 | : BufferPlacementTransformationBase(op), dominators(op), |
174 | postDominators(op), scopeOp(op) {} |
175 | |
176 | /// Moves allocations upwards. |
177 | void hoist() { |
178 | SmallVector<Value> allocsAndAllocas; |
179 | for (BufferPlacementAllocs::AllocEntry &entry : allocs) |
180 | allocsAndAllocas.push_back(Elt: std::get<0>(t&: entry)); |
181 | scopeOp->walk([&](memref::AllocaOp op) { |
182 | allocsAndAllocas.push_back(Elt: op.getMemref()); |
183 | }); |
184 | |
185 | for (auto allocValue : allocsAndAllocas) { |
186 | if (!StateT::shouldHoistOpType(allocValue.getDefiningOp())) |
187 | continue; |
188 | Operation *definingOp = allocValue.getDefiningOp(); |
189 | assert(definingOp && "No defining op" ); |
190 | auto operands = definingOp->getOperands(); |
191 | auto resultAliases = aliases.resolve(value: allocValue); |
192 | // Determine the common dominator block of all aliases. |
193 | Block *dominatorBlock = |
194 | findCommonDominator(allocValue, resultAliases, dominators); |
195 | // Init the initial hoisting state. |
196 | StateT state(&dominators, allocValue, allocValue.getParentBlock()); |
197 | // Check for additional allocation dependencies to compute an upper bound |
198 | // for hoisting. |
199 | Block *dependencyBlock = nullptr; |
200 | // If this node has dependencies, check all dependent nodes. This ensures |
201 | // that all dependency values have been computed before allocating the |
202 | // buffer. |
203 | for (Value depValue : operands) { |
204 | Block *depBlock = depValue.getParentBlock(); |
205 | if (!dependencyBlock || dominators.dominates(a: dependencyBlock, b: depBlock)) |
206 | dependencyBlock = depBlock; |
207 | } |
208 | |
209 | // Find the actual placement block and determine the start operation using |
210 | // an upper placement-block boundary. The idea is that placement block |
211 | // cannot be moved any further upwards than the given upper bound. |
212 | Block *placementBlock = findPlacementBlock( |
213 | state, upperBound: state.computeUpperBound(dominatorBlock, dependencyBlock)); |
214 | Operation *startOperation = BufferPlacementAllocs::getStartOperation( |
215 | allocValue, placementBlock, liveness); |
216 | |
217 | // Move the alloc in front of the start operation. |
218 | Operation *allocOperation = allocValue.getDefiningOp(); |
219 | allocOperation->moveBefore(existingOp: startOperation); |
220 | } |
221 | } |
222 | |
223 | private: |
224 | /// Finds a valid placement block by walking upwards in the CFG until we |
225 | /// either cannot continue our walk due to constraints (given by the StateT |
226 | /// implementation) or we have reached the upper-most dominator block. |
227 | Block *findPlacementBlock(StateT &state, Block *upperBound) { |
228 | Block *currentBlock = state.placementBlock; |
229 | // Walk from the innermost regions/loops to the outermost regions/loops and |
230 | // find an appropriate placement block that satisfies the constraint of the |
231 | // current StateT implementation. Walk until we reach the upperBound block |
232 | // (if any). |
233 | |
234 | // If we are not able to find a valid parent operation or an associated |
235 | // parent block, break the walk loop. |
236 | Operation *parentOp; |
237 | Block *parentBlock; |
238 | while ((parentOp = currentBlock->getParentOp()) && |
239 | (parentBlock = parentOp->getBlock()) && |
240 | (!upperBound || |
241 | dominators.properlyDominates(a: upperBound, b: currentBlock))) { |
242 | // Try to find an immediate dominator and check whether the parent block |
243 | // is above the immediate dominator (if any). |
244 | DominanceInfoNode *idom = nullptr; |
245 | |
246 | // DominanceInfo doesn't support getNode queries for single-block regions. |
247 | if (!currentBlock->isEntryBlock()) |
248 | idom = dominators.getNode(a: currentBlock)->getIDom(); |
249 | |
250 | if (idom && dominators.properlyDominates(a: parentBlock, b: idom->getBlock())) { |
251 | // If the current immediate dominator is below the placement block, move |
252 | // to the immediate dominator block. |
253 | currentBlock = idom->getBlock(); |
254 | state.recordMoveToDominator(currentBlock); |
255 | } else { |
256 | // We have to move to our parent block since an immediate dominator does |
257 | // either not exist or is above our parent block. If we cannot move to |
258 | // our parent operation due to constraints given by the StateT |
259 | // implementation, break the walk loop. Furthermore, we should not move |
260 | // allocations out of unknown region-based control-flow operations. |
261 | if (!isKnownControlFlowInterface(op: parentOp) || |
262 | !state.isLegalPlacement(parentOp)) |
263 | break; |
264 | // Move to our parent block by notifying the current StateT |
265 | // implementation. |
266 | currentBlock = parentBlock; |
267 | state.recordMoveToParent(currentBlock); |
268 | } |
269 | } |
270 | // Return the finally determined placement block. |
271 | return state.placementBlock; |
272 | } |
273 | |
274 | /// The dominator info to find the appropriate start operation to move the |
275 | /// allocs. |
276 | DominanceInfo dominators; |
277 | |
278 | /// The post dominator info to move the dependent allocs in the right |
279 | /// position. |
280 | PostDominanceInfo postDominators; |
281 | |
282 | /// The map storing the final placement blocks of a given alloc value. |
283 | llvm::DenseMap<Value, Block *> placementBlocks; |
284 | |
285 | /// The operation that this transformation is working on. It is used to also |
286 | /// gather allocas. |
287 | Operation *scopeOp; |
288 | }; |
289 | |
290 | /// A state implementation compatible with the `BufferAllocationHoisting` class |
291 | /// that hoists allocations into dominator blocks while keeping them inside of |
292 | /// loops. |
293 | struct BufferAllocationHoistingState : BufferAllocationHoistingStateBase { |
294 | using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; |
295 | |
296 | /// Computes the upper bound for the placement block search. |
297 | Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { |
298 | // If we do not have a dependency block, the upper bound is given by the |
299 | // dominator block. |
300 | if (!dependencyBlock) |
301 | return dominatorBlock; |
302 | |
303 | // Find the "lower" block of the dominator and the dependency block to |
304 | // ensure that we do not move allocations above this block. |
305 | return dominators->properlyDominates(a: dominatorBlock, b: dependencyBlock) |
306 | ? dependencyBlock |
307 | : dominatorBlock; |
308 | } |
309 | |
310 | /// Returns true if the given operation does not represent a loop. |
311 | bool isLegalPlacement(Operation *op) { return !isLoop(op); } |
312 | |
313 | /// Returns true if the given operation should be considered for hoisting. |
314 | static bool shouldHoistOpType(Operation *op) { |
315 | return allowAllocDominateBlockHoisting(op); |
316 | } |
317 | |
318 | /// Sets the current placement block to the given block. |
319 | void recordMoveToDominator(Block *block) { placementBlock = block; } |
320 | |
321 | /// Sets the current placement block to the given block. |
322 | void recordMoveToParent(Block *block) { recordMoveToDominator(block); } |
323 | }; |
324 | |
325 | /// A state implementation compatible with the `BufferAllocationHoisting` class |
326 | /// that hoists allocations out of loops. |
327 | struct BufferAllocationLoopHoistingState : BufferAllocationHoistingStateBase { |
328 | using BufferAllocationHoistingStateBase::BufferAllocationHoistingStateBase; |
329 | |
330 | /// Remembers the dominator block of all aliases. |
331 | Block *aliasDominatorBlock = nullptr; |
332 | |
333 | /// Computes the upper bound for the placement block search. |
334 | Block *computeUpperBound(Block *dominatorBlock, Block *dependencyBlock) { |
335 | aliasDominatorBlock = dominatorBlock; |
336 | // If there is a dependency block, we have to use this block as an upper |
337 | // bound to satisfy all allocation value dependencies. |
338 | return dependencyBlock ? dependencyBlock : nullptr; |
339 | } |
340 | |
341 | /// Returns true if the given operation represents a loop and one of the |
342 | /// aliases caused the `aliasDominatorBlock` to be "above" the block of the |
343 | /// given loop operation. If this is the case, it indicates that the |
344 | /// allocation is passed via a back edge. |
345 | bool isLegalPlacement(Operation *op) { |
346 | return isLoop(op) && |
347 | !dominators->dominates(a: aliasDominatorBlock, b: op->getBlock()); |
348 | } |
349 | |
350 | /// Returns true if the given operation should be considered for hoisting. |
351 | static bool shouldHoistOpType(Operation *op) { |
352 | return allowAllocLoopHoisting(op); |
353 | } |
354 | |
355 | /// Does not change the internal placement block, as we want to move |
356 | /// operations out of loops only. |
357 | void recordMoveToDominator(Block *block) {} |
358 | |
359 | /// Sets the current placement block to the given block. |
360 | void recordMoveToParent(Block *block) { placementBlock = block; } |
361 | }; |
362 | |
363 | //===----------------------------------------------------------------------===// |
364 | // BufferPlacementPromotion |
365 | //===----------------------------------------------------------------------===// |
366 | |
367 | /// Promotes heap-based allocations to stack-based allocations (if possible). |
368 | class BufferPlacementPromotion : BufferPlacementTransformationBase { |
369 | public: |
370 | BufferPlacementPromotion(Operation *op) |
371 | : BufferPlacementTransformationBase(op) {} |
372 | |
373 | /// Promote buffers to stack-based allocations. |
374 | void promote(function_ref<bool(Value)> isSmallAlloc) { |
375 | for (BufferPlacementAllocs::AllocEntry &entry : allocs) { |
376 | Value alloc = std::get<0>(t&: entry); |
377 | Operation *dealloc = std::get<1>(t&: entry); |
378 | // Checking several requirements to transform an AllocOp into an AllocaOp. |
379 | // The transformation is done if the allocation is limited to a given |
380 | // size. Furthermore, a deallocation must not be defined for this |
381 | // allocation entry and a parent allocation scope must exist. |
382 | if (!isSmallAlloc(alloc) || dealloc || |
383 | !hasAllocationScope(alloc, aliasAnalysis: aliases)) |
384 | continue; |
385 | |
386 | Operation *startOperation = BufferPlacementAllocs::getStartOperation( |
387 | allocValue: alloc, placementBlock: alloc.getParentBlock(), liveness); |
388 | // Build a new alloca that is associated with its parent |
389 | // `AutomaticAllocationScope` determined during the initialization phase. |
390 | OpBuilder builder(startOperation); |
391 | Operation *allocOp = alloc.getDefiningOp(); |
392 | if (auto allocInterface = dyn_cast<AllocationOpInterface>(allocOp)) { |
393 | Operation *alloca = |
394 | allocInterface.buildPromotedAlloc(builder, alloc).value(); |
395 | if (!alloca) |
396 | continue; |
397 | // Replace the original alloc by a newly created alloca. |
398 | allocOp->replaceAllUsesWith(values&: alloca); |
399 | allocOp->erase(); |
400 | } |
401 | } |
402 | } |
403 | }; |
404 | |
405 | //===----------------------------------------------------------------------===// |
406 | // BufferOptimizationPasses |
407 | //===----------------------------------------------------------------------===// |
408 | |
409 | /// The buffer hoisting pass that hoists allocation nodes into dominating |
410 | /// blocks. |
411 | struct BufferHoistingPass |
412 | : public bufferization::impl::BufferHoistingBase<BufferHoistingPass> { |
413 | |
414 | void runOnOperation() override { |
415 | // Hoist all allocations into dominator blocks. |
416 | BufferAllocationHoisting<BufferAllocationHoistingState> optimizer( |
417 | getOperation()); |
418 | optimizer.hoist(); |
419 | } |
420 | }; |
421 | |
422 | /// The buffer loop hoisting pass that hoists allocation nodes out of loops. |
423 | struct BufferLoopHoistingPass |
424 | : public bufferization::impl::BufferLoopHoistingBase< |
425 | BufferLoopHoistingPass> { |
426 | |
427 | void runOnOperation() override { |
428 | // Hoist all allocations out of loops. |
429 | hoistBuffersFromLoops(getOperation()); |
430 | } |
431 | }; |
432 | |
433 | /// The promote buffer to stack pass that tries to convert alloc nodes into |
434 | /// alloca nodes. |
435 | class PromoteBuffersToStackPass |
436 | : public bufferization::impl::PromoteBuffersToStackBase< |
437 | PromoteBuffersToStackPass> { |
438 | public: |
439 | PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes, |
440 | unsigned maxRankOfAllocatedMemRef) { |
441 | this->maxAllocSizeInBytes = maxAllocSizeInBytes; |
442 | this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef; |
443 | } |
444 | |
445 | explicit PromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) |
446 | : isSmallAlloc(std::move(isSmallAlloc)) {} |
447 | |
448 | LogicalResult initialize(MLIRContext *context) override { |
449 | if (isSmallAlloc == nullptr) { |
450 | isSmallAlloc = [=](Value alloc) { |
451 | return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes, |
452 | maxRankOfAllocatedMemRef); |
453 | }; |
454 | } |
455 | return success(); |
456 | } |
457 | |
458 | void runOnOperation() override { |
459 | // Move all allocation nodes and convert candidates into allocas. |
460 | BufferPlacementPromotion optimizer(getOperation()); |
461 | optimizer.promote(isSmallAlloc); |
462 | } |
463 | |
464 | private: |
465 | std::function<bool(Value)> isSmallAlloc; |
466 | }; |
467 | |
468 | } // namespace |
469 | |
470 | void mlir::bufferization::hoistBuffersFromLoops(Operation *op) { |
471 | BufferAllocationHoisting<BufferAllocationLoopHoistingState> optimizer(op); |
472 | optimizer.hoist(); |
473 | } |
474 | |
475 | std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() { |
476 | return std::make_unique<BufferHoistingPass>(); |
477 | } |
478 | |
479 | std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() { |
480 | return std::make_unique<BufferLoopHoistingPass>(); |
481 | } |
482 | |
483 | std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( |
484 | unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) { |
485 | return std::make_unique<PromoteBuffersToStackPass>(args&: maxAllocSizeInBytes, |
486 | args&: maxRankOfAllocatedMemRef); |
487 | } |
488 | |
489 | std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass( |
490 | std::function<bool(Value)> isSmallAlloc) { |
491 | return std::make_unique<PromoteBuffersToStackPass>(args: std::move(isSmallAlloc)); |
492 | } |
493 | |