1//===- LoopTiling.cpp --- Loop tiling pass ------------------------------*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements a pass to tile loop nests.
10//
11//===----------------------------------------------------------------------===//
12
13#include "mlir/Dialect/Affine/Passes.h"
14
15#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
16#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
17#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
18#include "mlir/Dialect/Affine/Analysis/Utils.h"
19#include "mlir/Dialect/Affine/IR/AffineOps.h"
20#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
21#include "mlir/Dialect/Affine/LoopUtils.h"
22#include "mlir/Dialect/Affine/Utils.h"
23#include "mlir/Dialect/Func/IR/FuncOps.h"
24#include "mlir/IR/Builders.h"
25#include "mlir/IR/IRMapping.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/Debug.h"
28#include <optional>
29
30namespace mlir {
31namespace affine {
32#define GEN_PASS_DEF_AFFINELOOPTILING
33#include "mlir/Dialect/Affine/Passes.h.inc"
34} // namespace affine
35} // namespace mlir
36
37using namespace mlir;
38using namespace mlir::affine;
39
40#define DEBUG_TYPE "affine-loop-tile"
41
42namespace {
43
44/// A pass to perform loop tiling on all suitable loop nests of a Function.
45struct LoopTiling : public affine::impl::AffineLoopTilingBase<LoopTiling> {
46 LoopTiling() = default;
47 explicit LoopTiling(uint64_t cacheSizeBytes, bool avoidMaxMinBounds = true)
48 : avoidMaxMinBounds(avoidMaxMinBounds) {
49 this->cacheSizeInKiB = cacheSizeBytes / 1024;
50 }
51
52 void runOnOperation() override;
53 void getTileSizes(ArrayRef<AffineForOp> band,
54 SmallVectorImpl<unsigned> *tileSizes);
55
56 // Default tile size if nothing is provided.
57 constexpr static unsigned kDefaultTileSize = 4;
58
59 // If true, tile sizes are set to avoid max/min in bounds if possible.
60 bool avoidMaxMinBounds = true;
61};
62
63} // namespace
64
65/// Creates a pass to perform loop tiling on all suitable loop nests of a
66/// Function.
67std::unique_ptr<OperationPass<func::FuncOp>>
68mlir::affine::createLoopTilingPass(uint64_t cacheSizeBytes) {
69 return std::make_unique<LoopTiling>(args&: cacheSizeBytes);
70}
71std::unique_ptr<OperationPass<func::FuncOp>>
72mlir::affine::createLoopTilingPass() {
73 return std::make_unique<LoopTiling>();
74}
75
76/// Reduces each tile size to the largest divisor of the corresponding trip
77/// count (if the trip count is known).
78static void adjustToDivisorsOfTripCounts(ArrayRef<AffineForOp> band,
79 SmallVectorImpl<unsigned> *tileSizes) {
80 assert(band.size() == tileSizes->size() && "invalid tile size count");
81 for (unsigned i = 0, e = band.size(); i < e; i++) {
82 unsigned &tSizeAdjusted = (*tileSizes)[i];
83 std::optional<uint64_t> mayConst = getConstantTripCount(band[i]);
84 if (!mayConst)
85 continue;
86 // Adjust the tile size to largest factor of the trip count less than
87 // tSize.
88 uint64_t constTripCount = *mayConst;
89 if (constTripCount > 1 && tSizeAdjusted > constTripCount / 2)
90 tSizeAdjusted = constTripCount / 2;
91 while (constTripCount % tSizeAdjusted != 0)
92 tSizeAdjusted--;
93 }
94}
95
96// Returns tile sizes to use. Checks CL options; if none are specified, sets it
97// based on a simple model that looks at the memory footprint and determines
98// tile sizes assuming identity accesses / 1:1 tile size proportional footprint
99// along each of the dimensions being tiled.
100// TODO: evolve this model. Tile size determination is a large area
101// to play with in general.
102void LoopTiling::getTileSizes(ArrayRef<AffineForOp> band,
103 SmallVectorImpl<unsigned> *tileSizes) {
104 if (band.empty())
105 return;
106
107 // Use command-line tileSize for all loops if specified.
108 if (tileSize) {
109 tileSizes->assign(band.size(), tileSize);
110 return;
111 }
112
113 // Use supplied tile sizes and fill them with default tile size if it's short.
114 if (!this->tileSizes.empty()) {
115 tileSizes->assign(this->tileSizes.begin(), this->tileSizes.end());
116 tileSizes->resize(N: band.size(), NV: kDefaultTileSize);
117 return;
118 }
119 tileSizes->resize(N: band.size());
120
121 // If the cache size is zero, set the minimum valid tile size. No good reason
122 // to pick another specific size over this.
123 if (cacheSizeInKiB == 0) {
124 std::fill(first: tileSizes->begin(), last: tileSizes->end(), value: 1);
125 return;
126 }
127
128 // The first loop in the band.
129 AffineForOp rootForOp = band[0];
130 (void)rootForOp;
131
132 // Obtain memory footprint and set tile sizes so that a tile fits in
133 // the cache size. This is an approximation with the assumption that the
134 // footprint increases with the tile size linearly in that dimension (i.e.,
135 // assumes one-to-one access function).
136 std::optional<int64_t> fp = getMemoryFootprintBytes(band[0], 0);
137 if (!fp) {
138 // Fill with default tile sizes if footprint is unknown.
139 std::fill(first: tileSizes->begin(), last: tileSizes->end(),
140 value: LoopTiling::kDefaultTileSize);
141 if (avoidMaxMinBounds)
142 adjustToDivisorsOfTripCounts(band, tileSizes);
143 LLVM_DEBUG(
144 rootForOp.emitWarning("memory footprint unknown: using default tile "
145 "sizes adjusted to trip count divisors"));
146 return;
147 }
148
149 // Check how many times larger the cache size is when compared to footprint.
150 uint64_t cacheSizeBytes = cacheSizeInKiB * 1024;
151 uint64_t excessFactor = llvm::divideCeil(Numerator: *fp, Denominator: cacheSizeBytes);
152 if (excessFactor <= 1) {
153 // No need of any tiling - set tile size to 1.
154 std::fill(first: tileSizes->begin(), last: tileSizes->end(), value: 1);
155 return;
156 }
157
158 // Divide all loops equally in an attempt to reduce footprint.
159 // TODO: this is approximate. Ideally, obtain reuse factor /
160 // profitability along each dimension and weight tile sizes based on that as
161 // one possible approach. Or compute a polynomial in tile sizes and solve for
162 // it.
163
164 // For an n-d tileable band, compute the n^th root of the excess.
165 unsigned tSize =
166 static_cast<unsigned>(floorl(x: std::pow(x: excessFactor, y: 1.0 / band.size())));
167 // We'll keep a running product to determine the last tile size better.
168 unsigned cumulProductOfTileSizes = 1;
169 for (unsigned i = 0, e = band.size(); i < e; i++) {
170 if (i < e - 1)
171 (*tileSizes)[i] = tSize;
172 else
173 // Set last tile size to cover the balance.
174 (*tileSizes)[i] = std::max(
175 a: 1U, b: static_cast<unsigned>(excessFactor / cumulProductOfTileSizes));
176 cumulProductOfTileSizes *= (*tileSizes)[i];
177 }
178 if (avoidMaxMinBounds)
179 adjustToDivisorsOfTripCounts(band, tileSizes);
180}
181
182void LoopTiling::runOnOperation() {
183 // Bands of loops to tile.
184 std::vector<SmallVector<AffineForOp, 6>> bands;
185 getTileableBands(getOperation(), &bands);
186
187 // Tile each band.
188 for (auto &band : bands) {
189 if (!isTilingValid(band)) {
190 band.front().emitRemark("tiling nest is invalid due to dependences");
191 continue;
192 }
193
194 // Set up tile sizes; fill missing tile sizes at the end with default tile
195 // size or tileSize if one was provided.
196 SmallVector<unsigned, 6> tileSizes;
197 getTileSizes(band, &tileSizes);
198 if (llvm::DebugFlag) {
199 auto diag = band[0].emitRemark("using tile sizes [");
200 for (unsigned tSize : tileSizes)
201 diag << tSize << ' ';
202 diag << "]\n";
203 }
204 SmallVector<AffineForOp, 6> tiledNest;
205 if (failed(Result: tilePerfectlyNested(band, tileSizes, &tiledNest))) {
206 // An empty band always succeeds.
207 assert(!band.empty() && "guaranteed to succeed on empty bands");
208 LLVM_DEBUG(band.front()->emitRemark("loop tiling failed!\n"));
209 continue;
210 }
211
212 // Separate full and partial tiles.
213 if (separate) {
214 auto intraTileLoops =
215 MutableArrayRef<AffineForOp>(tiledNest).drop_front(N: band.size());
216 if (failed(separateFullTiles(intraTileLoops))) {
217 assert(!intraTileLoops.empty() &&
218 "guaranteed to succeed on empty bands");
219 LLVM_DEBUG(intraTileLoops.front()->emitRemark(
220 "separation post tiling failed!\n"));
221 }
222 }
223 }
224}
225
226constexpr unsigned LoopTiling::kDefaultTileSize;
227

Provided by KDAB

Privacy Policy
Learn to use CMake with our Intro Training
Find out more

source code of mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp