1 | //===- LoopInvariantCodeMotionUtils.h - LICM Utils --------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #ifndef MLIR_TRANSFORMS_LOOPINVARIANTCODEMOTIONUTILS_H |
10 | #define MLIR_TRANSFORMS_LOOPINVARIANTCODEMOTIONUTILS_H |
11 | |
12 | #include "mlir/Support/LLVM.h" |
13 | |
14 | #include "llvm/ADT/SmallVector.h" |
15 | |
16 | namespace mlir { |
17 | |
18 | class LoopLikeOpInterface; |
19 | class Operation; |
20 | class Region; |
21 | class RewriterBase; |
22 | class Value; |
23 | |
24 | /// Given a list of regions, perform loop-invariant code motion. An operation is |
25 | /// loop-invariant if it depends only of values defined outside of the loop. |
26 | /// LICM moves these operations out of the loop body so that they are not |
27 | /// computed more than once. |
28 | /// |
29 | /// Example: |
30 | /// |
31 | /// ```mlir |
32 | /// affine.for %arg0 = 0 to 10 { |
33 | /// affine.for %arg1 = 0 to 10 { |
34 | /// %v0 = arith.addi %arg0, %arg0 : i32 |
35 | /// %v1 = arith.addi %v0, %arg1 : i32 |
36 | /// } |
37 | /// } |
38 | /// ``` |
39 | /// |
40 | /// After LICM: |
41 | /// |
42 | /// ```mlir |
43 | /// affine.for %arg0 = 0 to 10 { |
44 | /// %v0 = arith.addi %arg0, %arg0 : i32 |
45 | /// affine.for %arg1 = 0 to 10 { |
46 | /// %v1 = arith.addi %v0, %arg1 : i32 |
47 | /// } |
48 | /// } |
49 | /// ``` |
50 | /// |
51 | /// Users must supply three callbacks. |
52 | /// |
53 | /// - `isDefinedOutsideRegion` returns true if the given value is invariant with |
54 | /// respect to the given region. A common implementation might be: |
55 | /// `value.getParentRegion()->isProperAncestor(region)`. |
56 | /// - `shouldMoveOutOfRegion` returns true if the provided operation can be |
57 | /// moved of the given region, e.g. if it is side-effect free. |
58 | /// - `moveOutOfRegion` moves the operation out of the given region. A common |
59 | /// implementation might be: `op->moveBefore(region->getParentOp())`. |
60 | /// |
61 | /// An operation is moved if all of its operands satisfy |
62 | /// `isDefinedOutsideRegion` and it satisfies `shouldMoveOutOfRegion`. |
63 | /// |
64 | /// Returns the number of operations moved. |
65 | size_t moveLoopInvariantCode( |
66 | ArrayRef<Region *> regions, |
67 | function_ref<bool(Value, Region *)> isDefinedOutsideRegion, |
68 | function_ref<bool(Operation *, Region *)> shouldMoveOutOfRegion, |
69 | function_ref<void(Operation *, Region *)> moveOutOfRegion); |
70 | |
71 | /// Move side-effect free loop invariant code out of a loop-like op using |
72 | /// methods provided by the interface. |
73 | size_t moveLoopInvariantCode(LoopLikeOpInterface loopLike); |
74 | |
75 | /// Hoist loop-invariant tensor subsets (subset extraction and subset insertion |
76 | /// ops) from loop-like ops. Extraction ops are moved before the loop. Insertion |
77 | /// ops are moved after the loop. The loop body operates on newly added region |
78 | /// iter_args (one per extraction-insertion pair). |
79 | /// |
80 | /// A subset extraction op (`SubsetExtractionOpInterface`) extracts from a |
81 | /// tensor value at a subset. The result of the op may have an arbitrary type, |
82 | /// i.e., not necessarily a tensor type. Example: "tensor.extract_slice". |
83 | /// |
84 | /// A subset insertion op (`SubsetInsertionOpInterface`) inserts into a tensor |
85 | /// value ("destination") at a subset. Example: "tensor.insert_slice". |
86 | /// |
87 | /// Matching extraction-insertion subset ops can be hoisted from a loop if there |
88 | /// are no other ops within the loop that operate on the same or on an |
89 | /// overlapping subset. In particular, non-subset ops can prevent hoisting |
90 | /// because the analysis does not know what subset they operate on. |
91 | /// |
92 | /// Example: |
93 | /// ``` |
94 | /// %r = scf.for ... iter_args(%t = %a) -> (tensor<?xf32>) { |
95 | /// %0 = tensor.extract_slice %t[0][5][1] : tensor<?xf32> to tensor<5xf32> |
96 | /// %1 = "test.foo"(%0) : (tensor<5xf32>) -> (tensor<5xf32>) |
97 | /// %2 = tensor.insert_slice %1 into %t[0][5][1] |
98 | /// : tensor<5xf32> into tensor<?xf32> |
99 | /// scf.yield %2 : tensor<?xf32> |
100 | /// } |
101 | /// ``` |
102 | /// Is rewritten to: |
103 | /// ``` |
104 | /// %0 = tensor.extract_slice %a[0][5][1] : tensor<?xf32> to tensor<5xf32> |
105 | /// %new_loop:2 = scf.for ... iter_args(%t = %a, %h = %0) -> (tensor<?xf32>) { |
106 | /// %1 = "test.foo"(%h) : (tensor<5xf32>) -> (tensor<5xf32>) |
107 | /// scf.yield %t, %2 : tensor<?xf32>, tensor<5xf32> |
108 | /// } |
109 | /// %r = tensor.insert_slice %new_loop#1 into %new_loop#0 |
110 | /// : tensor<5xf32> into tensor<?xf32> |
111 | /// ``` |
112 | LoopLikeOpInterface hoistLoopInvariantSubsets(RewriterBase &rewriter, |
113 | LoopLikeOpInterface loopLike); |
114 | |
115 | } // end namespace mlir |
116 | |
117 | #endif // MLIR_TRANSFORMS_LOOPINVARIANTCODEMOTIONUTILS_H |
118 | |