VectorUtils.cpp source code [mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp]

1	//===- VectorUtils.cpp - MLIR Utilities for VectorOps ------------------===//
2	//
3	// Part of the MLIR Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements utility methods for working with the Vector dialect.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "mlir/Dialect/Vector/Utils/VectorUtils.h"
14
15	#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
16	#include "mlir/Dialect/Affine/IR/AffineOps.h"
17	#include "mlir/Dialect/Arith/IR/Arith.h"
18	#include "mlir/Dialect/Func/IR/FuncOps.h"
19	#include "mlir/Dialect/MemRef/IR/MemRef.h"
20	#include "mlir/Dialect/Tensor/IR/Tensor.h"
21	#include "mlir/Dialect/Utils/IndexingUtils.h"
22	#include "mlir/Dialect/Vector/IR/VectorOps.h"
23	#include "mlir/IR/Builders.h"
24	#include "mlir/IR/IntegerSet.h"
25	#include "mlir/IR/Operation.h"
26	#include "mlir/IR/TypeUtilities.h"
27	#include "mlir/Support/LLVM.h"
28
29	#include "llvm/ADT/DenseSet.h"
30	#include "llvm/Support/InterleavedRange.h"
31
32	#define DEBUG_TYPE "vector-utils"
33
34	#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
35	#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
36
37	using namespace mlir;
38
39	/// Helper function that creates a memref::DimOp or tensor::DimOp depending on
40	/// the type of `source`.
41	Value mlir::vector::createOrFoldDimOp(OpBuilder &b, Location loc, Value source,
42	int64_t dim) {
43	if (isa<UnrankedMemRefType, MemRefType>(source.getType()))
44	return b.createOrFold<memref::DimOp>(loc, source, dim);
45	if (isa<UnrankedTensorType, RankedTensorType>(source.getType()))
46	return b.createOrFold<tensor::DimOp>(loc, source, dim);
47	llvm_unreachable("Expected MemRefType or TensorType");
48	}
49
50	/// Given the n-D transpose pattern 'transp', return true if 'dim0' and 'dim1'
51	/// should be transposed with each other within the context of their 2D
52	/// transposition slice.
53	///
54	/// Example 1: dim0 = 0, dim1 = 2, transp = [2, 1, 0]
55	/// Return true: dim0 and dim1 are transposed within the context of their 2D
56	/// transposition slice ([1, 0]).
57	///
58	/// Example 2: dim0 = 0, dim1 = 1, transp = [2, 1, 0]
59	/// Return true: dim0 and dim1 are transposed within the context of their 2D
60	/// transposition slice ([1, 0]). Paradoxically, note how dim1 (1) is not
61	/// transposed within the full context of the transposition.
62	///
63	/// Example 3: dim0 = 0, dim1 = 1, transp = [2, 0, 1]
64	/// Return false: dim0 and dim1 are not* transposed within the context of*
65	/// their 2D transposition slice ([0, 1]). Paradoxically, note how dim0 (0)
66	/// and dim1 (1) are transposed within the full context of the of the
67	/// transposition.
68	static bool areDimsTransposedIn2DSlice(int64_t dim0, int64_t dim1,
69	ArrayRef<int64_t> transp) {
70	// Perform a linear scan along the dimensions of the transposed pattern. If
71	// dim0 is found first, dim0 and dim1 are not transposed within the context of
72	// their 2D slice. Otherwise, 'dim1' is found first and they are transposed.
73	for (int64_t permDim : transp) {
74	if (permDim == dim0)
75	return false;
76	if (permDim == dim1)
77	return true;
78	}
79
80	llvm_unreachable("Ill-formed transpose pattern");
81	}
82
83	FailureOr<std::pair<int, int>>
84	mlir::vector::isTranspose2DSlice(vector::TransposeOp op) {
85	VectorType srcType = op.getSourceVectorType();
86	SmallVector<int64_t> srcGtOneDims;
87	for (auto [index, size] : llvm::enumerate(srcType.getShape()))
88	if (size > `1`)
89	srcGtOneDims.push_back(index);
90
91	if (srcGtOneDims.size() != `2`)
92	return failure();
93
94	// Check whether the two source vector dimensions that are greater than one
95	// must be transposed with each other so that we can apply one of the 2-D
96	// transpose pattens. Otherwise, these patterns are not applicable.
97	if (!areDimsTransposedIn2DSlice(srcGtOneDims [`0`], srcGtOneDims [`1`],
98	op.getPermutation()))
99	return failure();
100
101	return std::pair<int, int>(srcGtOneDims [`0`], srcGtOneDims [`1`]);
102	}
103
104	/// Constructs a permutation map from memref indices to vector dimension.
105	///
106	/// The implementation uses the knowledge of the mapping of enclosing loop to
107	/// vector dimension. `enclosingLoopToVectorDim` carries this information as a
108	/// map with:
109	/// - keys representing "vectorized enclosing loops";
110	/// - values representing the corresponding vector dimension.
111	/// The algorithm traverses "vectorized enclosing loops" and extracts the
112	/// at-most-one MemRef index that is invariant along said loop. This index is
113	/// guaranteed to be at most one by construction: otherwise the MemRef is not
114	/// vectorizable.
115	/// If this invariant index is found, it is added to the permutation_map at the
116	/// proper vector dimension.
117	/// If no index is found to be invariant, 0 is added to the permutation_map and
118	/// corresponds to a vector broadcast along that dimension.
119	///
120	/// Returns an empty AffineMap if `enclosingLoopToVectorDim` is empty,
121	/// signalling that no permutation map can be constructed given
122	/// `enclosingLoopToVectorDim`.
123	///
124	/// Examples can be found in the documentation of `makePermutationMap`, in the
125	/// header file.
126	static AffineMap makePermutationMap(
127	ArrayRef<Value> indices,
128	const DenseMap<Operation , unsigned*> &enclosingLoopToVectorDim) {
129	if (enclosingLoopToVectorDim.empty())
130	return AffineMap ();
131	MLIRContext *context =
132	enclosingLoopToVectorDim.begin()->getFirst()->getContext();
133	SmallVector<AffineExpr> perm(enclosingLoopToVectorDim.size(),
134	getAffineConstantExpr(constant: `0`, context));
135
136	for (auto kvp : enclosingLoopToVectorDim) {
137	assert(kvp.second < perm.size());
138	auto invariants = affine::getInvariantAccesses(
139	iv: cast<affine::AffineForOp>(kvp.first).getInductionVar(), indices);
140	unsigned numIndices = indices.size();
141	unsigned countInvariantIndices = `0`;
142	for (unsigned dim = `0`; dim < numIndices; ++dim) {
143	if (!invariants.count(indices [dim])) {
144	assert(perm[kvp.second] == getAffineConstantExpr(`0`, context) &&
145	"permutationMap already has an entry along dim");
146	perm [kvp.second] = getAffineDimExpr(position: dim, context);
147	} else {
148	++countInvariantIndices;
149	}
150	}
151	assert((countInvariantIndices == numIndices \|\|
152	countInvariantIndices == numIndices - `1`) &&
153	"Vectorization prerequisite violated: at most 1 index may be "
154	"invariant wrt a vectorized loop");
155	(void)countInvariantIndices;
156	}
157	return AffineMap::get(dimCount: indices.size(), symbolCount: `0`, results: perm, context);
158	}
159
160	/// Implementation detail that walks up the parents and records the ones with
161	/// the specified type.
162	/// TODO: could also be implemented as a collect parents followed by a
163	/// filter and made available outside this file.
164	template <typename T>
165	static SetVector<Operation > getParentsOfType(Block block) {
166	SetVector<Operation *> res;
167	auto *current = block->getParentOp();
168	while (current) {
169	if ([[maybe_unused]] auto typedParent = dyn_cast<T>(current)) {
170	assert(res.count(current) == `0` && "Already inserted");
171	res.insert(X: current);
172	}
173	current = current->getParentOp();
174	}
175	return res;
176	}
177
178	/// Returns the enclosing AffineForOp, from closest to farthest.
179	static SetVector<Operation > getEnclosingforOps(Block block) {
180	return getParentsOfType<affine::AffineForOp>(block);
181	}
182
183	AffineMap mlir::makePermutationMap(
184	Block *insertPoint, ArrayRef<Value> indices,
185	const DenseMap<Operation , unsigned*> &loopToVectorDim) {
186	DenseMap<Operation , unsigned*> enclosingLoopToVectorDim;
187	auto enclosingLoops = getEnclosingforOps(block: insertPoint);
188	for (auto *forInst : enclosingLoops) {
189	auto it = loopToVectorDim.find(Val: forInst);
190	if (it != loopToVectorDim.end()) {
191	enclosingLoopToVectorDim.insert(KV: *it);
192	}
193	}
194	return ::makePermutationMap(indices, enclosingLoopToVectorDim);
195	}
196
197	AffineMap mlir::makePermutationMap(
198	Operation *op, ArrayRef<Value> indices,
199	const DenseMap<Operation , unsigned*> &loopToVectorDim) {
200	return makePermutationMap(insertPoint: op->getBlock(), indices, loopToVectorDim);
201	}
202
203	bool matcher::operatesOnSuperVectorsOf(Operation &op,
204	VectorType subVectorType) {
205	// First, extract the vector type and distinguish between:
206	// a. ops that must* lower a super-vector (i.e. vector.transfer_read,*
207	// vector.transfer_write); and
208	// b. ops that may* lower a super-vector (all other ops).*
209	// The ops that may* lower a super-vector only do so if the super-vector to*
210	// sub-vector ratio exists. The ops that must* lower a super-vector are*
211	// explicitly checked for this property.
212	/// TODO: there should be a single function for all ops to do this so we
213	/// do not have to special case. Maybe a trait, or just a method, unclear atm.
214	bool mustDivide = false;
215	(void)mustDivide;
216	VectorType superVectorType;
217	if (auto transfer = dyn_cast<VectorTransferOpInterface>(op)) {
218	superVectorType = transfer.getVectorType();
219	mustDivide = true;
220	} else if (op.getNumResults() == `0`) {
221	if (!isa<func::ReturnOp>(op)) {
222	op.emitError(message: "NYI: assuming only return operations can have 0 "
223	" results at this point");
224	}
225	return false;
226	} else if (op.getNumResults() == `1`) {
227	if (auto v = dyn_cast<VectorType>(op.getResult(`0`).getType())) {
228	superVectorType = v;
229	} else {
230	// Not a vector type.
231	return false;
232	}
233	} else {
234	// Not a vector.transfer and has more than 1 result, fail hard for now to
235	// wake us up when something changes.
236	op.emitError(message: "NYI: operation has more than 1 result");
237	return false;
238	}
239
240	// Get the ratio.
241	auto ratio =
242	computeShapeRatio(superVectorType.getShape(), subVectorType.getShape());
243
244	// Sanity check.
245	assert((ratio \|\| !mustDivide) &&
246	"vector.transfer operation in which super-vector size is not an"
247	" integer multiple of sub-vector size");
248
249	// This catches cases that are not strictly necessary to have multiplicity but
250	// still aren't divisible by the sub-vector shape.
251	// This could be useful information if we wanted to reshape at the level of
252	// the vector type (but we would have to look at the compute and distinguish
253	// between parallel, reduction and possibly other cases.
254	return ratio.has_value();
255	}
256
257	bool vector::isContiguousSlice(MemRefType memrefType, VectorType vectorType) {
258	if (vectorType.isScalable())
259	return false;
260
261	ArrayRef<int64_t> vectorShape = vectorType.getShape();
262	auto vecRank = vectorType.getRank();
263
264	if (!memrefType.areTrailingDimsContiguous(vecRank))
265	return false;
266
267	// Extract the trailing dims and strides of the input memref
268	auto memrefShape = memrefType.getShape().take_back(vecRank);
269
270	// Compare the dims of `vectorType` against `memrefType` (in reverse).
271	// In the most basic case, all dims will match.
272	auto firstNonMatchingDim =
273	std::mismatch(vectorShape.rbegin(), vectorShape.rend(),
274	memrefShape.rbegin(), memrefShape.rend());
275	if (firstNonMatchingDim.first == vectorShape.rend())
276	return true;
277
278	// One non-matching dim is still fine, however the remaining leading dims of
279	// `vectorType` need to be 1.
280	SmallVector<int64_t> leadingDims(++firstNonMatchingDim.first,
281	vectorShape.rend());
282
283	return llvm::all_of(Range&: leadingDims, P: [](auto x) { return x == `1`; });
284	}
285
286	std::optional<StaticTileOffsetRange>
287	vector::createUnrollIterator(VectorType vType, int64_t targetRank) {
288	if (vType.getRank() <= targetRank)
289	return {};
290	// Attempt to unroll until targetRank or the first scalable dimension (which
291	// cannot be unrolled).
292	auto shapeToUnroll = vType.getShape().drop_back(targetRank);
293	auto scalableDimsToUnroll = vType.getScalableDims().drop_back(targetRank);
294	auto it = llvm::find(scalableDimsToUnroll, true);
295	auto firstScalableDim = it - scalableDimsToUnroll.begin();
296	if (firstScalableDim == `0`)
297	return {};
298	// All scalable dimensions should be removed now.
299	scalableDimsToUnroll = scalableDimsToUnroll.slice(`0`, firstScalableDim);
300	assert(!llvm::is_contained(scalableDimsToUnroll, true) &&
301	"unexpected leading scalable dimension");
302	// Create an unroll iterator for leading dimensions.
303	shapeToUnroll = shapeToUnroll.slice(`0`, firstScalableDim);
304	return StaticTileOffsetRange(shapeToUnroll, /unrollStep=/`1`);
305	}
306
307	SmallVector<OpFoldResult> vector::getMixedSizesXfer(bool hasTensorSemantics,
308	Operation *xfer,
309	RewriterBase &rewriter) {
310	auto loc = xfer->getLoc();
311
312	Value base = TypeSwitch<Operation *, Value>(xfer)
313	.Case<vector::TransferReadOp>(
314	caseFn: [&](auto readOp) { return readOp.getBase(); })
315	.Case<vector::TransferWriteOp>(
316	caseFn: [&](auto writeOp) { return writeOp.getOperand(`1`); });
317
318	SmallVector<OpFoldResult> mixedSourceDims =
319	hasTensorSemantics ? tensor::getMixedSizes(builder&: rewriter, loc, value: base)
320	: memref::getMixedSizes(builder&: rewriter, loc, value: base);
321	return mixedSourceDims;
322	}
323
324	bool vector::isLinearizableVector(VectorType type) {
325	return (type.getRank() > `1`) && (type.getNumScalableDims() <= `1`);
326	}
327
328	Value vector::createReadOrMaskedRead(OpBuilder &builder, Location loc,
329	Value source,
330	ArrayRef<int64_t> inputVectorSizes,
331	Value padValue,
332	bool useInBoundsInsteadOfMasking) {
333	assert(llvm::none_of(inputVectorSizes,
334	[](int64_t s) { return s == ShapedType::kDynamic; }) &&
335	"invalid input vector sizes");
336	auto sourceShapedType = cast<ShapedType>(source.getType());
337	auto sourceShape = sourceShapedType.getShape();
338	assert(sourceShape.size() == inputVectorSizes.size() &&
339	"expected same ranks.");
340	auto vectorType = VectorType::get(inputVectorSizes, padValue.getType());
341	assert(padValue.getType() == sourceShapedType.getElementType() &&
342	"expected same pad element type to match source element type");
343	int64_t readRank = inputVectorSizes.size();
344	auto zero = builder.create<arith::ConstantIndexOp>(location: loc, args: `0`);
345	SmallVector<bool> inBoundsVal(readRank, true);
346
347	if (useInBoundsInsteadOfMasking) {
348	// Update the inBounds attribute.
349	// FIXME: This computation is too weak - it ignores the read indices.
350	for (unsigned i = `0`; i < readRank; i++)
351	inBoundsVal[i] = (sourceShape[i] == inputVectorSizes[i]) &&
352	!ShapedType::isDynamic(sourceShape[i]);
353	}
354	auto transferReadOp = builder.create<vector::TransferReadOp>(
355	loc,
356	/vectorType=/vectorType,
357	/source=/source,
358	/indices=/SmallVector<Value>(readRank, zero),
359	/padding=/padValue,
360	/inBounds=/inBoundsVal);
361
362	if (llvm::equal(inputVectorSizes, sourceShape) \|\| useInBoundsInsteadOfMasking)
363	return transferReadOp;
364	SmallVector<OpFoldResult> mixedSourceDims =
365	tensor::getMixedSizes(builder, loc, value: source);
366
367	auto maskType = VectorType::get(inputVectorSizes, builder.getI1Type());
368	Value mask =
369	builder.create<vector::CreateMaskOp>(loc, maskType, mixedSourceDims);
370	return mlir::vector::maskOperation(builder, maskableOp: transferReadOp, mask)
371	->getResult(`0`);
372	}
373
374	LogicalResult
375	vector::isValidMaskedInputVector(ArrayRef<int64_t> shape,
376	ArrayRef<int64_t> inputVectorSizes) {
377	LDBG("Iteration space static sizes:" << llvm::interleaved(shape));
378
379	if (inputVectorSizes.size() != shape.size()) {
380	LDBG("Input vector sizes don't match the number of loops");
381	return failure();
382	}
383	if (ShapedType::isDynamicShape(inputVectorSizes)) {
384	LDBG("Input vector sizes can't have dynamic dimensions");
385	return failure();
386	}
387	if (!llvm::all_of(Range: llvm::zip(t&: shape, u&: inputVectorSizes),
388	P: [](std::tuple<int64_t, int64_t> sizePair) {
389	int64_t staticSize = std::get<`0`>(t&: sizePair);
390	int64_t inputSize = std::get<`1`>(t&: sizePair);
391	return ShapedType::isDynamic(staticSize) \|\|
392	staticSize <= inputSize;
393	})) {
394	LDBG("Input vector sizes must be greater than or equal to iteration space "
395	"static sizes");
396	return failure();
397	}
398	return success();
399	}
400

Provided by KDAB

Learn to use CMake with our Intro Training

Find out more

Definitions

source code of mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp