TensorOps.cpp source code [mlir/lib/Dialect/Tensor/IR/TensorOps.cpp]

1	//===----------------------------------------------------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8
9	#include "mlir/Dialect/Affine/IR/AffineOps.h"
10	#include "mlir/Dialect/Arith/IR/Arith.h"
11	#include "mlir/Dialect/Arith/Utils/Utils.h"
12	#include "mlir/Dialect/Complex/IR/Complex.h"
13	#include "mlir/Dialect/Linalg/IR/RelayoutOpInterface.h"
14	#include "mlir/Dialect/Tensor/IR/Tensor.h"
15	#include "mlir/Dialect/Utils/IndexingUtils.h"
16	#include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
17	#include "mlir/Dialect/Utils/StaticValueUtils.h"
18	#include "mlir/IR/Builders.h"
19	#include "mlir/IR/BuiltinAttributeInterfaces.h"
20	#include "mlir/IR/BuiltinTypeInterfaces.h"
21	#include "mlir/IR/BuiltinTypes.h"
22	#include "mlir/IR/IRMapping.h"
23	#include "mlir/IR/Matchers.h"
24	#include "mlir/IR/OpDefinition.h"
25	#include "mlir/IR/PatternMatch.h"
26	#include "mlir/IR/TypeUtilities.h"
27	#include "mlir/Interfaces/DestinationStyleOpInterface.h"
28	#include "mlir/Interfaces/InferIntRangeInterface.h"
29	#include "mlir/Interfaces/LoopLikeInterface.h"
30	#include "mlir/Interfaces/Utils/InferIntRangeCommon.h"
31	#include "mlir/Interfaces/ViewLikeInterface.h"
32	#include "mlir/Support/LLVM.h"
33	#include "llvm/ADT/DenseSet.h"
34	#include "llvm/ADT/STLExtras.h"
35	#include "llvm/ADT/SmallBitVector.h"
36	#include "llvm/ADT/StringRef.h"
37	#include "llvm/Support/Casting.h"
38	#include "llvm/Support/LogicalResult.h"
39	#include "llvm/Support/MathExtras.h"
40	#include <algorithm>
41	#include <optional>
42	#include <vector>
43
44	using namespace mlir;
45	using namespace mlir::tensor;
46
47	using llvm::divideCeilSigned;
48	using llvm::divideFloorSigned;
49	using llvm::mod;
50
51	/// Materialize a single constant operation from a given attribute value with
52	/// the desired resultant type.
53	Operation *TensorDialect::materializeConstant(OpBuilder &builder,
54	Attribute value, Type type,
55	Location loc) {
56	if (auto op = arith::ConstantOp::materialize(builder, value, type, loc))
57	return op;
58	if (complex::ConstantOp::isBuildableWith(value, type))
59	return builder.create<complex::ConstantOp>(loc, type,
60	llvm::cast<ArrayAttr>(value));
61	return nullptr;
62	}
63
64	OpFoldResult tensor::getMixedSize(OpBuilder &builder, Location loc, Value value,
65	int64_t dim) {
66	auto tensorType = llvm::cast<RankedTensorType>(value.getType());
67	if (tensorType.isDynamicDim(dim))
68	return builder.createOrFold<tensor::DimOp>(loc, value, dim);
69
70	return builder.getIndexAttr(value: tensorType.getDimSize(dim));
71	}
72
73	SmallVector<OpFoldResult> tensor::getMixedSizes(OpBuilder &builder,
74	Location loc, Value value) {
75	auto tensorType = llvm::cast<RankedTensorType>(value.getType());
76	SmallVector<OpFoldResult> result;
77	for (int64_t i = `0`; i < tensorType.getRank(); ++i)
78	result.push_back(Elt: getMixedSize(builder, loc, value, dim: i));
79	return result;
80	}
81
82	FailureOr<Value> tensor::getOrCreateDestination(OpBuilder &b, Location loc,
83	OpResult opResult) {
84	auto tensorType = llvm::dyn_cast<TensorType>(Val: opResult.getType());
85	assert(tensorType && "expected tensor type");
86
87	// If the op has a destination, it implements DestinationStyleOpInterface and
88	// we can query the destination operand from that interface.
89	auto destOp = opResult.getDefiningOp<DestinationStyleOpInterface>();
90	if (destOp)
91	return destOp.getTiedOpOperand(opResult)->get();
92
93	// Otherwise, create a new destination tensor with the same shape.
94	OpBuilder::InsertionGuard g(b);
95	b.setInsertionPoint(opResult.getDefiningOp());
96
97	// Compute sizes.
98	SmallVector<OpFoldResult> mixedSizes;
99	if (!tensorType.hasStaticShape()) {
100	// Dynamic shape: Query ReifyRankedShapedTypeOpInterface.
101	ReifiedRankedShapedTypeDims reifiedShapes;
102	if (failed(Result: reifyResultShapes(b, op: opResult.getDefiningOp(), reifiedReturnShapes&: reifiedShapes)))
103	return failure();
104	mixedSizes = reifiedShapes [opResult.getResultNumber()];
105	} else {
106	// Static shape: Take static sizes directly.
107	for (int64_t sz : tensorType.getShape())
108	mixedSizes.push_back(b.getIndexAttr(sz));
109	}
110
111	// Create empty tensor.
112	Value emptyTensor =
113	b.create<tensor::EmptyOp>(loc, mixedSizes, tensorType.getElementType());
114	return emptyTensor;
115	}
116
117	LogicalResult tensor::getOrCreateDestinations(OpBuilder &b, Location loc,
118	Operation *op,
119	SmallVector<Value> &result) {
120	for (OpResult opResult : op->getResults()) {
121	if (llvm::isa<TensorType>(Val: opResult.getType())) {
122	FailureOr<Value> destination = getOrCreateDestination(b, loc, opResult);
123	if (failed(Result: destination))
124	return failure();
125	result.push_back(Elt: *destination);
126	}
127	}
128	return success();
129	}
130
131	bool tensor::isSameTypeWithoutEncoding(Type tp1, Type tp2) {
132	if (auto rtp1 = llvm::dyn_cast<RankedTensorType>(tp1)) {
133	if (auto rtp2 = llvm::dyn_cast<RankedTensorType>(tp2))
134	return rtp1.getShape() == rtp2.getShape() &&
135	rtp1.getElementType() == rtp2.getElementType();
136	return false;
137	}
138	return tp1 == tp2; // default implementation
139	}
140
141	/// Compute the dropped dimensions of a rank-reducing tensor.extract_slice op or
142	/// rank-extending tensor.insert_slice op.
143	static llvm::SmallBitVector getDroppedDims(ArrayRef<int64_t> reducedShape,
144	ArrayRef<OpFoldResult> mixedSizes) {
145	llvm::SmallBitVector droppedDims(mixedSizes.size());
146	int64_t shapePos = reducedShape.size() - `1`;
147
148	for (const auto &size : enumerate(First: llvm::reverse(C&: mixedSizes))) {
149	size_t idx = mixedSizes.size() - size.index() - `1`;
150	// Rank-reduced dims must have a static unit dimension.
151	bool isStaticUnitSize =
152	isa<Attribute>(Val: size.value()) &&
153	llvm::cast<IntegerAttr>(cast<Attribute>(Val: size.value())).getInt() == `1`;
154
155	if (shapePos < `0`) {
156	// There are no more dims in the reduced shape. All remaining sizes must
157	// be rank-reduced dims.
158	assert(isStaticUnitSize && "expected unit dim");
159	droppedDims.set(idx);
160	continue;
161	}
162
163	// Dim is preserved if the size is not a static 1.
164	if (!isStaticUnitSize) {
165	--shapePos;
166	continue;
167	}
168
169	// Dim is preserved if the reduced shape dim is also 1.
170	if (reducedShape [shapePos] == `1`) {
171	--shapePos;
172	continue;
173	}
174
175	// Otherwise: Dim is dropped.
176	droppedDims.set(idx);
177	}
178
179	assert(shapePos < `0` && "dimension mismatch");
180	return droppedDims;
181	}
182
183	/// Given a ranked tensor type and a range of values that defines its dynamic
184	/// dimension sizes, turn all dynamic sizes that have a constant value into
185	/// static dimension sizes.
186	static RankedTensorType
187	foldDynamicToStaticDimSizes(RankedTensorType type, ValueRange dynamicSizes,
188	SmallVector<Value> &foldedDynamicSizes) {
189	SmallVector<int64_t> staticShape(type.getShape());
190	assert(type.getNumDynamicDims() == dynamicSizes.size() &&
191	"incorrect number of dynamic sizes");
192
193	// Compute new static and dynamic sizes.
194	unsigned ctr = `0`;
195	for (int64_t i = `0`, e = type.getRank(); i < e; ++i) {
196	if (type.isDynamicDim(i)) {
197	Value dynamicSize = dynamicSizes [ctr++];
198	std::optional<int64_t> cst = getConstantIntValue(ofr: dynamicSize);
199	if (cst.has_value()) {
200	// Dynamic size must be non-negative.
201	if (cst.value() < `0`) {
202	foldedDynamicSizes.push_back(Elt: dynamicSize);
203	continue;
204	}
205	staticShape [i] = *cst;
206	} else {
207	foldedDynamicSizes.push_back(Elt: dynamicSize);
208	}
209	}
210	}
211
212	return RankedTensorType::get(staticShape, type.getElementType(),
213	type.getEncoding());
214	}
215
216	//===----------------------------------------------------------------------===//
217	// BitcastOp
218	//===----------------------------------------------------------------------===//
219
220	bool BitcastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
221	if (inputs.size() != `1` \|\| outputs.size() != `1`)
222	return false;
223	Type a = inputs.front(), b = outputs.front();
224	auto aT = dyn_cast<TensorType>(a);
225	auto bT = dyn_cast<TensorType>(b);
226	if (!aT \|\| !bT)
227	return false;
228
229	if (aT.getElementTypeBitWidth() != bT.getElementTypeBitWidth())
230	return false;
231
232	return succeeded(verifyCompatibleShape(aT, bT));
233	}
234
235	namespace {
236
237	/// Replaces chains of two tensor.bitcast operations by a single tensor.bitcast
238	/// operation.
239	struct ChainedTensorBitcast : public OpRewritePattern<BitcastOp> {
240	using OpRewritePattern<BitcastOp>::OpRewritePattern;
241
242	LogicalResult matchAndRewrite(BitcastOp tensorBitcast,
243	PatternRewriter &rewriter) const final {
244	auto tensorBitcastOperand =
245	tensorBitcast.getOperand().getDefiningOp<BitcastOp>();
246	if (!tensorBitcastOperand)
247	return failure();
248
249	auto resultType = cast<TensorType>(tensorBitcast.getType());
250	rewriter.replaceOpWithNewOp<BitcastOp>(tensorBitcast, resultType,
251	tensorBitcastOperand.getOperand());
252	return success();
253	}
254	};
255
256	} // namespace
257
258	void BitcastOp::getCanonicalizationPatterns(RewritePatternSet &results,
259	MLIRContext *context) {
260	results.add<ChainedTensorBitcast>(context);
261	}
262
263	//===----------------------------------------------------------------------===//
264	// CastOp
265	//===----------------------------------------------------------------------===//
266
267	void CastOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
268	setNameFn(getResult(), "cast");
269	}
270
271	/// Returns true if `target` is a ranked tensor type that preserves static
272	/// information available in the `source` ranked tensor type.
273	bool mlir::tensor::preservesStaticInformation(Type source, Type target) {
274	auto sourceType = llvm::dyn_cast<RankedTensorType>(source);
275	auto targetType = llvm::dyn_cast<RankedTensorType>(target);
276
277	// Requires RankedTensorType.
278	if (!sourceType \|\| !targetType)
279	return false;
280
281	// Requires same elemental type.
282	if (sourceType.getElementType() != targetType.getElementType())
283	return false;
284
285	// Requires same rank.
286	if (sourceType.getRank() != targetType.getRank())
287	return false;
288
289	// Requires same encoding.
290	if (sourceType.getEncoding() != targetType.getEncoding())
291	return false;
292
293	// If cast is towards more static sizes along any dimension, don't fold.
294	for (auto t : llvm::zip(sourceType.getShape(), targetType.getShape())) {
295	if (!ShapedType::isDynamic(std::get<`0`>(t)) &&
296	ShapedType::isDynamic(std::get<`1`>(t)))
297	return false;
298	}
299
300	return true;
301	}
302
303	/// Determines whether tensor::CastOp casts to a more dynamic version of the
304	/// source tensor. This is useful to fold a tensor.cast into a consuming op and
305	/// implement canonicalization patterns for ops in different dialects that may
306	/// consume the results of tensor.cast operations. Such foldable tensor.cast
307	/// operations are typically inserted as `slice` ops and are canonicalized,
308	/// to preserve the type compatibility of their uses.
309	///
310	/// Returns true when all conditions are met:
311	/// 1. source and result are ranked tensors with same element type and rank.
312	/// 2. the tensor type has more static information than the result
313	///
314	/// Example:
315	/// ```mlir
316	/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
317	/// %2 = consumer %1 ... : tensor<?x?xf32> ...
318	/// ```
319	///
320	/// folds into:
321	///
322	/// ```mlir
323	/// %2 = consumer %0 ... : tensor<8x16xf32> ...
324	/// ```
325	bool mlir::tensor::canFoldIntoConsumerOp(CastOp castOp) {
326	if (!castOp)
327	return false;
328
329	// Can fold if the source of cast has at least as much static information as
330	// its results.
331	return preservesStaticInformation(castOp.getType(),
332	castOp.getSource().getType());
333	}
334
335	/// Determines whether the tensor::CastOp casts to a more static version of the
336	/// source tensor. This is useful to fold into a producing op and implement
337	/// canonicalization patterns with the `tensor.cast` op as the root, but
338	/// producer being from different dialects. Returns true when all conditions are
339	/// met:
340	/// 1. source and result and ranked tensors with same element type and rank.
341	/// 2. the result type has more static information than the source.
342	///
343	/// Example:
344	/// ```mlir
345	/// %1 = producer ... : tensor<?x?xf32>
346	/// %2 = tensor.cast %1 : tensor<?x?xf32> to tensor<8x16xf32>
347	/// ```
348	///
349	/// can be canonicalized to :
350	///
351	/// ```mlir
352	/// %2 = producer ... : tensor<8x16xf32>
353	/// ```
354	/// Not all ops might be canonicalizable this way, but for those that can be,
355	/// this method provides a check that it is worth doing the canonicalization.
356	bool mlir::tensor::canFoldIntoProducerOp(CastOp castOp) {
357	if (!castOp)
358	return false;
359	return preservesStaticInformation(castOp.getSource().getType(),
360	castOp.getType());
361	}
362
363	bool mlir::tensor::hasFoldableTensorCastOperand(Operation *op) {
364	return llvm::any_of(Range: op->getOpOperands(), P: [&](OpOperand &opOperand) {
365	if (llvm::isa<BlockArgument>(Val: opOperand.get()))
366	return false;
367	auto castOp = opOperand.get().getDefiningOp<tensor::CastOp>();
368	return castOp && canFoldIntoConsumerOp(castOp);
369	});
370	}
371
372	SmallVector<Value> mlir::tensor::getUpdatedOperandsAfterCastOpFolding(
373	DestinationStyleOpInterface op, SmallVector<Type> &newResTy) {
374	SmallVector<Value> newOperands;
375	newOperands.reserve(N: op->getNumOperands());
376
377	assert(hasFoldableTensorCastOperand(op) && "No foldable CastOp operands!");
378
379	// Assumes that the result has dpsInits followed by nonDpsInits.
380	int64_t dpsInitIdx = `0`;
381	for (OpOperand &opOperand : op->getOpOperands()) {
382	auto tensorCastOp = opOperand.get().getDefiningOp<tensor::CastOp>();
383	bool fold = canFoldIntoConsumerOp(tensorCastOp);
384	newOperands.push_back(fold ? tensorCastOp.getOperand() : opOperand.get());
385	if (op.isDpsInit(&opOperand) &&
386	!llvm::isa<MemRefType>(newOperands.back().getType()))
387	newResTy[dpsInitIdx++] = newOperands.back().getType();
388	}
389	return newOperands;
390	}
391
392	/// Performs folding of any operand of `op` if it comes from a tensor::CastOp
393	/// that can be folded.
394	LogicalResult mlir::tensor::foldTensorCast(Operation *op) {
395	bool folded = false;
396	for (OpOperand &operand : op->getOpOperands()) {
397	auto castOp = operand.get().getDefiningOp<tensor::CastOp>();
398	if (castOp && tensor::canFoldIntoConsumerOp(castOp)) {
399	operand.set(castOp.getOperand());
400	folded = true;
401	}
402	}
403	return success(IsSuccess: folded);
404	}
405
406	bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
407	if (inputs.size() != `1` \|\| outputs.size() != `1`)
408	return false;
409	Type a = inputs.front(), b = outputs.front();
410	auto aT = llvm::dyn_cast<TensorType>(a);
411	auto bT = llvm::dyn_cast<TensorType>(b);
412	if (!aT \|\| !bT)
413	return false;
414
415	if (aT.getElementType() != bT.getElementType())
416	return false;
417
418	return succeeded(verifyCompatibleShape(aT, bT));
419	}
420
421	/// Compute a TensorType that has the joined shape knowledge of the two
422	/// given TensorTypes. The element types need to match.
423	static TensorType joinShapes(TensorType one, TensorType two) {
424	assert(one.getElementType() == two.getElementType());
425
426	if (!one.hasRank())
427	return two;
428	if (!two.hasRank())
429	return one;
430
431	int64_t rank = one.getRank();
432	if (rank != two.getRank())
433	return {};
434
435	SmallVector<int64_t, `4`> join;
436	join.reserve(N: rank);
437	for (int64_t i = `0`; i < rank; ++i) {
438	if (one.isDynamicDim(i)) {
439	join.push_back(Elt: two.getDimSize(i));
440	continue;
441	}
442	if (two.isDynamicDim(i)) {
443	join.push_back(Elt: one.getDimSize(i));
444	continue;
445	}
446	if (one.getDimSize(i) != two.getDimSize(i))
447	return {};
448	join.push_back(Elt: one.getDimSize(i));
449	}
450	return RankedTensorType::get(join, one.getElementType());
451	}
452
453	namespace {
454
455	/// Replaces chains of two tensor.cast operations by a single tensor.cast
456	/// operation if doing so does not remove runtime constraints.
457	struct ChainedTensorCast : public OpRewritePattern<CastOp> {
458	using OpRewritePattern<CastOp>::OpRewritePattern;
459
460	LogicalResult matchAndRewrite(CastOp tensorCast,
461	PatternRewriter &rewriter) const final {
462	auto tensorCastOperand = tensorCast.getOperand().getDefiningOp<CastOp>();
463
464	if (!tensorCastOperand)
465	return failure();
466
467	auto sourceType =
468	llvm::cast<TensorType>(tensorCastOperand.getOperand().getType());
469	auto intermediateType = llvm::cast<TensorType>(tensorCastOperand.getType());
470	auto resultType = llvm::cast<TensorType>(tensorCast.getType());
471
472	// We can remove the intermediate cast if joining all three produces the
473	// same result as just joining the source and result shapes.
474	auto firstJoin =
475	joinShapes(joinShapes(sourceType, intermediateType), resultType);
476
477	// The join might not exist if the cast sequence would fail at runtime.
478	if (!firstJoin)
479	return failure();
480
481	// The newJoin always exists if the above join exists, it might just contain
482	// less information. If so, we cannot drop the intermediate cast, as doing
483	// so would remove runtime checks.
484	auto newJoin = joinShapes(sourceType, resultType);
485	if (firstJoin != newJoin)
486	return failure();
487
488	rewriter.replaceOpWithNewOp<CastOp>(tensorCast, resultType,
489	tensorCastOperand.getOperand());
490	return success();
491	}
492	};
493
494	/// Fold tensor.cast into tesor.extract_slice producer.
495	/// Example:
496	/// ```
497	/// %0 = tensor.extract_slice %arg0[%o, 0] [%s, 512] [1, 1] :
498	/// tensor<128x512xf32> to tensor<?x512xf32>
499	/// %1 = tensor.cast %0 : tensor<?x512xf32> to tensor<16x512xf32>
500	/// ```
501	/// ->
502	/// ```
503	/// %1 = tensor.extract_slice %arg0[%o, 0] [16, 512] [1, 1] :
504	/// tensor<128x512xf32> to tensor<16x512xf32>
505	/// ```
506	struct TensorCastExtractSlice : public OpRewritePattern<CastOp> {
507	using OpRewritePattern<CastOp>::OpRewritePattern;
508
509	LogicalResult matchAndRewrite(CastOp tensorCast,
510	PatternRewriter &rewriter) const final {
511	auto extractOperand =
512	tensorCast.getOperand().getDefiningOp<ExtractSliceOp>();
513
514	// Cannot fold cast to unranked tensor.
515	auto rankedResultType =
516	llvm::dyn_cast<RankedTensorType>(tensorCast.getType());
517	if (!rankedResultType)
518	return failure();
519
520	if (!extractOperand \|\| !canFoldIntoProducerOp(tensorCast) \|\|
521	rankedResultType.getShape() ==
522	llvm::cast<RankedTensorType>(tensorCast.getSource().getType())
523	.getShape())
524	return failure();
525
526	SmallVector<OpFoldResult, `4`> sizes = extractOperand.getMixedSizes();
527	auto dimMask = computeRankReductionMask(
528	extractOperand.getStaticSizes(), extractOperand.getType().getShape());
529	size_t dimIndex = `0`;
530	for (size_t i = `0`, e = sizes.size(); i < e; i++) {
531	if (dimMask && dimMask->count(i))
532	continue;
533	int64_t dim = rankedResultType.getShape()[dimIndex++];
534	if (ShapedType::isDynamic(dim))
535	continue;
536	sizes [i] = rewriter.getIndexAttr(dim);
537	}
538
539	rewriter.replaceOpWithNewOp<ExtractSliceOp>(
540	tensorCast, rankedResultType, extractOperand.getSource(),
541	extractOperand.getMixedOffsets(), sizes,
542	extractOperand.getMixedStrides());
543	return success();
544	}
545	};
546
547	} // namespace
548
549	void CastOp::getCanonicalizationPatterns(RewritePatternSet &results,
550	MLIRContext *context) {
551	results.add<ChainedTensorCast, TensorCastExtractSlice>(context);
552	}
553
554	//===----------------------------------------------------------------------===//
555	// ConcatOp
556	//===----------------------------------------------------------------------===//
557
558	RankedTensorType ConcatOp::inferResultType(int64_t dim, TypeRange inputTypes) {
559	assert(!inputTypes.empty() && "cannot concatenate 0 tensors");
560	auto tensorTypes =
561	llvm::to_vector<`4`>(llvm::map_range(inputTypes, [](Type type) {
562	return llvm::cast<RankedTensorType>(type);
563	}));
564	int64_t concatRank = tensorTypes[`0`].getRank();
565
566	// The concatenation dim must be in the range [0, rank).
567	assert(dim >= `0` && dim < concatRank && "Invalid concatenation dim");
568
569	SmallVector<int64_t> sizes(concatRank);
570	for (int64_t i = `0`, e = concatRank; i < e; ++i) {
571	if (i == dim)
572	continue;
573	SaturatedInteger size;
574	for (auto tensorType : tensorTypes)
575	size = *size.desaturate(SaturatedInteger::wrap(tensorType.getDimSize(i)));
576	sizes[i] = size.asInteger();
577	}
578	auto concatSize = SaturatedInteger::wrap(`0`);
579	for (auto tensorType : tensorTypes)
580	concatSize =
581	concatSize + SaturatedInteger::wrap(tensorType.getDimSize(dim));
582	sizes[dim] = concatSize.asInteger();
583	return RankedTensorType::get(sizes, tensorTypes[`0`].getElementType());
584	}
585
586	void ConcatOp::build(OpBuilder &builder, OperationState &result, int64_t dim,
587	ValueRange inputs) {
588	FailureOr<RankedTensorType> resultType =
589	inferResultType(dim, inputs.getTypes());
590	assert(succeeded(resultType) && "failed to infer concatenation result type");
591	build(builder, result, *resultType, dim, inputs);
592	}
593
594	LogicalResult ConcatOp::verify() {
595	if (getInputs().size() < `1`)
596	return emitOpError("requires at least one input");
597
598	SmallVector<RankedTensorType> inputTypes;
599	for (auto input : getInputs())
600	inputTypes.push_back(cast<RankedTensorType>(input.getType()));
601
602	RankedTensorType resultType = getResultType();
603	int64_t resultRank = getRank();
604	if (llvm::any_of(inputTypes, [resultRank](RankedTensorType type) {
605	return type.getRank() != resultRank;
606	}))
607	return emitOpError("rank of concatenated inputs must match result rank");
608
609	Type resultElementType = resultType.getElementType();
610	if (llvm::any_of(inputTypes, [&](RankedTensorType type) {
611	return type.getElementType() != resultElementType;
612	}))
613	return emitOpError("inputs and result element type must match");
614
615	int64_t dim = getDim();
616	if (dim >= resultRank)
617	return emitOpError("concatenation dim must be less than the tensor rank");
618
619	SmallVector<int64_t> sizes(resultRank);
620	for (int64_t i = `0`, e = resultRank; i < e; ++i) {
621	if (i == dim)
622	continue;
623	SaturatedInteger size;
624	for (auto tensorType : inputTypes) {
625	FailureOr<SaturatedInteger> maybeSize =
626	size.desaturate(SaturatedInteger::wrap(tensorType.getDimSize(i)));
627	if (failed(maybeSize))
628	return emitOpError("static concatenation size mismatch along ")
629	<< "non-concatenated dimension " << i;
630	size = *maybeSize;
631	}
632	sizes[i] = size.asInteger();
633	}
634	auto concatSize = SaturatedInteger::wrap(`0`);
635	for (auto tensorType : inputTypes)
636	concatSize =
637	concatSize + SaturatedInteger::wrap(tensorType.getDimSize(dim));
638	sizes[dim] = concatSize.asInteger();
639	auto inferredResultType =
640	RankedTensorType::get(sizes, inputTypes[`0`].getElementType());
641
642	for (auto [inferredSize, actualSize] :
643	llvm::zip_equal(inferredResultType.getShape(), resultType.getShape())) {
644	bool hasDynamic = ShapedType::isDynamic(inferredSize) \|\|
645	ShapedType::isDynamic(actualSize);
646	if (!hasDynamic && inferredSize != actualSize)
647	return emitOpError("result type ")
648	<< resultType << "does not match inferred shape "
649	<< inferredResultType << " static sizes";
650	}
651
652	return success();
653	}
654
655	FailureOr<SmallVector<Value>> ConcatOp::decomposeOperation(OpBuilder &builder) {
656	size_t numInputs = getInputs().size();
657	uint64_t concatDim = getDim();
658
659	SmallVector<SmallVector<OpFoldResult>> inputShapes;
660	inputShapes.reserve(numInputs);
661	SmallVector<OpFoldResult> concatOffsets;
662	concatOffsets.reserve(numInputs);
663	SmallVector<OpFoldResult> outputShape;
664
665	AffineExpr addExpr =
666	builder.getAffineSymbolExpr(`0`) + builder.getAffineSymbolExpr(`1`);
667	OpFoldResult zero = builder.getIndexAttr(`0`);
668	Location loc = getLoc();
669	for (auto [index, input] : llvm::enumerate(getInputs())) {
670	SmallVector<OpFoldResult> inputShape =
671	tensor::getMixedSizes(builder, input.getLoc(), input);
672	if (index == `0`) {
673	outputShape = inputShape;
674	concatOffsets.push_back(zero);
675	} else {
676	concatOffsets.push_back(outputShape[concatDim]);
677	outputShape[concatDim] = affine::makeComposedFoldedAffineApply(
678	builder, loc, addExpr,
679	{outputShape[concatDim], inputShape[concatDim]});
680	}
681	inputShapes.emplace_back(std::move(inputShape));
682	}
683
684	Value replacement = builder.create<tensor::EmptyOp>(
685	loc, outputShape, getType().getElementType());
686
687	int64_t rank = getType().getRank();
688	OpFoldResult one = builder.getIndexAttr(`1`);
689	SmallVector<OpFoldResult> strides(rank, one);
690	SmallVector<OpFoldResult> offsets(rank, zero);
691	for (auto [index, input] : llvm::enumerate(getInputs())) {
692	offsets[concatDim] = concatOffsets[index];
693	auto insertSlice = builder.create<tensor::InsertSliceOp>(
694	loc, input, replacement, offsets, inputShapes[index], strides);
695	replacement = insertSlice.getResult();
696	}
697	if (replacement.getType() != getType()) {
698	replacement = builder.create<tensor::CastOp>(loc, getType(), replacement);
699	}
700	return SmallVector<Value>{replacement};
701	}
702
703	LogicalResult
704	ConcatOp::reifyResultShapes(OpBuilder &builder,
705	ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
706	ValueRange inputs = getInputs();
707	int64_t dim = getDim();
708	RankedTensorType inferredResultType = inferResultType(dim, inputs.getTypes());
709
710	Value init = inputs[`0`];
711	int64_t rank = getType().getRank();
712
713	reifiedReturnShapes.resize(`1`, SmallVector<OpFoldResult>(rank));
714
715	// Pre-populate the result sizes with as much static information as possible
716	// from the given result type, as well as the inferred result type, otherwise
717	// use the dim sizes from the first input.
718	for (int64_t i = `0`; i < rank; ++i) {
719	if (i == dim)
720	continue;
721	if (!getType().isDynamicDim(i)) {
722	reifiedReturnShapes[`0`][i] = builder.getIndexAttr(getType().getDimSize(i));
723	} else if (!inferredResultType.isDynamicDim(i)) {
724	reifiedReturnShapes[`0`][i] = getValueOrCreateConstantIndexOp(
725	builder, getLoc(),
726	builder.getIndexAttr(inferredResultType.getDimSize(i)));
727	} else {
728	reifiedReturnShapes[`0`][i] =
729	builder.create<tensor::DimOp>(init.getLoc(), init, i).getResult();
730	}
731	}
732
733	if (getType().isDynamicDim(dim)) {
734	// Take the sum of the input sizes along the concatenated dim.
735	AffineExpr sum = builder.getAffineDimExpr(`0`);
736	SmallVector<OpFoldResult> sizes = {
737	builder.createOrFold<tensor::DimOp>(init.getLoc(), init, dim)};
738	for (auto [idx, input] : llvm::enumerate(inputs.drop_front())) {
739	sum = sum + builder.getAffineDimExpr(idx + `1`);
740	sizes.push_back(
741	builder.createOrFold<tensor::DimOp>(input.getLoc(), input, dim));
742	}
743	reifiedReturnShapes[`0`][dim] = getValueOrCreateConstantIndexOp(
744	builder, getLoc(),
745	affine::makeComposedFoldedAffineApply(builder, getLoc(), sum, sizes));
746	} else {
747	// If the result shape is static along the concatenated dim, use the static
748	// shape.
749	reifiedReturnShapes[`0`][dim] =
750	builder.getIndexAttr(getType().getDimSize(dim));
751	}
752	return success();
753	}
754
755	void ConcatOp::getAsmResultNames(
756	function_ref<void(Value, StringRef)> setNameFn) {
757	setNameFn(getResult(), "concat");
758	}
759
760	OpFoldResult ConcatOp::fold(FoldAdaptor) {
761	ValueRange inputs = getInputs();
762	if (inputs.size() == `1` && inputs[`0`].getType() == getResultType())
763	return inputs[`0`];
764	return {};
765	}
766
767	namespace {
768	/// Fold a concat op with a single input to a cast.
769	struct SingleInputConcatOp : public OpRewritePattern<ConcatOp> {
770	using OpRewritePattern<ConcatOp>::OpRewritePattern;
771
772	LogicalResult matchAndRewrite(ConcatOp concatOp,
773	PatternRewriter &rewriter) const override {
774	if (concatOp.getInputs().size() != `1`)
775	return failure();
776	rewriter.replaceOpWithNewOp<CastOp>(concatOp, concatOp.getResultType(),
777	concatOp.getInputs()[`0`]);
778	return success();
779	}
780	};
781
782	/// Propagate static shapes into the operands of a `tensor.concat`.
783	///
784	/// `tensor.concat` requires every operand to match on all dimensions except the
785	/// concatenation dimension. If one operand is already static in those
786	/// dimensions, the other operands may safely be refined to that same static
787	/// shape.
788	///
789	/// Example:
790	///
791	/// ```mlir
792	/// %2 = tensor.concat dim(0) %0, %1: (tensor<?x12xi32>, tensor<?x?xi32>) ->
793	/// tensor<?x12xi32>
794	/// ```
795	/// ->
796	/// ```mlir
797	/// %cast = tensor.cast %1 : tensor<?x?xi32> to tensor<?x12xi32>
798	/// %2 = tensor.concat dim(0) %0, %cast :
799	/// (tensor<?x12xi32>, tensor<?x12xi32>) -> tensor<?x12xi32>
800	/// ```
801	struct InferConcatOperandTypes : public OpRewritePattern<ConcatOp> {
802	using OpRewritePattern<ConcatOp>::OpRewritePattern;
803
804	LogicalResult matchAndRewrite(ConcatOp concatOp,
805	PatternRewriter &rewriter) const override {
806	int64_t dim = concatOp.getDim();
807	RankedTensorType inferredResultType =
808	ConcatOp::inferResultType(dim, concatOp->getOperandTypes());
809
810	// Find operands for which a more static shape can be inferred.
811	LogicalResult matched = failure();
812	// Inferred operand shapes are identical in every dimension except the
813	// concatenation dimension.
814	SmallVector<int64_t> inferredOperandShape(inferredResultType.getShape());
815	for (auto [operandIdx, operandType] :
816	llvm::enumerate(concatOp->getOperandTypes())) {
817	// Compute inferred type for operand.
818	inferredOperandShape[dim] =
819	cast<RankedTensorType>(operandType).getDimSize(dim);
820	auto inferredOperandType = RankedTensorType::get(
821	inferredOperandShape, inferredResultType.getElementType());
822
823	// Check if inferred type is more static.
824	if (!preservesStaticInformation(inferredOperandType, operandType)) {
825	matched = success();
826
827	// Use refined operand type and create cast from original operand.
828	auto castOp =
829	rewriter.create<CastOp>(concatOp->getLoc(), inferredOperandType,
830	concatOp.getOperand(operandIdx));
831	rewriter.modifyOpInPlace(concatOp, [=, operandIdx = operandIdx] {
832	concatOp->setOperand(operandIdx, castOp->getResult(`0`));
833	});
834	}
835	}
836
837	return matched;
838	}
839	};
840
841	// Ensure `tensor.concat`'s result type is at least as static as can be inferred
842	// from its operand types.
843	///
844	/// Example:
845	/// ```mlir
846	/// %2 = tensor.concat dim(0) %0, %1: (tensor<?x12xi32>, tensor<?x12xi32>) ->
847	/// tensor<?x?xi32>
848	/// ```
849	/// ->
850	/// ```mlir
851	/// %2 = tensor.concat dim(0) %0, %cast : (tensor<?x12xi32>, tensor<?x12xi32>)
852	/// -> tensor<?x12xi32> %cast = tensor.cast %2 : tensor<?x12xi32> to
853	/// tensor<?x?xi32>
854	/// ```
855	struct InferConcatResultType : public OpRewritePattern<ConcatOp> {
856	using OpRewritePattern<ConcatOp>::OpRewritePattern;
857
858	LogicalResult matchAndRewrite(ConcatOp concatOp,
859	PatternRewriter &rewriter) const override {
860	int64_t dim = concatOp.getDim();
861	RankedTensorType inferredResultType =
862	ConcatOp::inferResultType(dim, concatOp->getOperandTypes());
863
864	// The result type should be at least as static as inferred result type.
865	if (preservesStaticInformation(inferredResultType,
866	concatOp.getResultType())) {
867	return failure();
868	}
869
870	auto newConcatOp = rewriter.create<ConcatOp>(
871	concatOp->getLoc(), inferredResultType, dim, concatOp->getOperands());
872	rewriter.replaceOpWithNewOp<CastOp>(concatOp, concatOp.getResultType(),
873	newConcatOp);
874
875	return success();
876	}
877	};
878	} // namespace
879
880	void ConcatOp::getCanonicalizationPatterns(RewritePatternSet &results,
881	MLIRContext *context) {
882	results
883	.add<SingleInputConcatOp, InferConcatOperandTypes, InferConcatResultType>(
884	context);
885	}
886
887	//===----------------------------------------------------------------------===//
888	// DimOp
889	//===----------------------------------------------------------------------===//
890
891	void DimOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
892	setNameFn(getResult(), "dim");
893	}
894
895	void DimOp::build(OpBuilder &builder, OperationState &result, Value source,
896	int64_t index) {
897	auto loc = result.location;
898	Value indexValue = builder.create<arith::ConstantIndexOp>(loc, index);
899	build(builder, result, source, indexValue);
900	}
901
902	std::optional<int64_t> DimOp::getConstantIndex() {
903	return getConstantIntValue(getIndex());
904	}
905
906	Speculation::Speculatability DimOp::getSpeculatability() {
907	auto constantIndex = getConstantIndex();
908	if (!constantIndex)
909	return Speculation::NotSpeculatable;
910
911	auto rankedSourceType = dyn_cast<RankedTensorType>(getSource().getType());
912	if (!rankedSourceType)
913	return Speculation::NotSpeculatable;
914
915	if (rankedSourceType.getRank() <= constantIndex)
916	return Speculation::NotSpeculatable;
917
918	return Speculation::Speculatable;
919	}
920
921	void DimOp::inferResultRangesFromOptional(ArrayRef<IntegerValueRange> argRanges,
922	SetIntLatticeFn setResultRange) {
923	setResultRange(getResult(),
924	intrange::inferShapedDimOpInterface(*this, argRanges[`1`]));
925	}
926
927	OpFoldResult DimOp::fold(FoldAdaptor adaptor) {
928	// All forms of folding require a known index.
929	auto index = llvm::dyn_cast_if_present<IntegerAttr>(adaptor.getIndex());
930	if (!index)
931	return {};
932
933	// Folding for unranked types (UnrankedTensorType) is not supported.
934	auto tensorType = llvm::dyn_cast<RankedTensorType>(getSource().getType());
935	if (!tensorType)
936	return {};
937
938	// Out of bound indices produce undefined behavior but are still valid IR.
939	// Don't choke on them.
940	int64_t indexVal = index.getInt();
941	if (indexVal < `0` \|\| indexVal >= tensorType.getRank())
942	return {};
943
944	// Fold if the shape extent along the given index is known.
945	if (!tensorType.isDynamicDim(index.getInt())) {
946	Builder builder(getContext());
947	return builder.getIndexAttr(tensorType.getShape()[index.getInt()]);
948	}
949
950	Operation *definingOp = getSource().getDefiningOp();
951
952	// Fold dim to the operand of tensor.generate.
953	if (auto fromElements = dyn_cast_or_null<tensor::GenerateOp>(definingOp)) {
954	auto resultType =
955	llvm::cast<RankedTensorType>(fromElements.getResult().getType());
956	// The case where the type encodes the size of the dimension is handled
957	// above.
958	assert(ShapedType::isDynamic(resultType.getShape()[index.getInt()]));
959
960	// Find the operand of the fromElements that corresponds to this index.
961	auto dynExtents = fromElements.getDynamicExtents().begin();
962	for (auto dim : resultType.getShape().take_front(index.getInt()))
963	if (ShapedType::isDynamic(dim))
964	dynExtents++;
965
966	return Value{*dynExtents};
967	}
968
969	// The size at the given index is now known to be a dynamic size.
970	unsigned unsignedIndex = index.getValue().getZExtValue();
971
972	if (auto sliceOp = dyn_cast_or_null<tensor::ExtractSliceOp>(definingOp)) {
973	// Fold only for non-rank reduced ops. For the rank-reduced version, rely on
974	// `resolve-shaped-type-result-dims` pass.
975	if (sliceOp.getType().getRank() == sliceOp.getSourceType().getRank() &&
976	sliceOp.isDynamicSize(unsignedIndex)) {
977	return {sliceOp.getDynamicSize(unsignedIndex)};
978	}
979	}
980
981	// dim(cast) -> dim
982	if (succeeded(foldTensorCast(*this)))
983	return getResult();
984
985	return {};
986	}
987
988	namespace {
989	/// Fold dim of a cast into the dim of the source of the tensor cast.
990	struct DimOfCastOp : public OpRewritePattern<DimOp> {
991	using OpRewritePattern<DimOp>::OpRewritePattern;
992
993	LogicalResult matchAndRewrite(DimOp dimOp,
994	PatternRewriter &rewriter) const override {
995	auto castOp = dimOp.getSource().getDefiningOp<CastOp>();
996	if (!castOp)
997	return failure();
998	Value newSource = castOp.getOperand();
999	rewriter.replaceOpWithNewOp<DimOp>(dimOp, newSource, dimOp.getIndex());
1000	return success();
1001	}
1002	};
1003
1004	/// Fold dim of a destination passing style op into the dim of the corresponding
1005	/// init.
1006	struct DimOfDestStyleOp : public OpRewritePattern<DimOp> {
1007	using OpRewritePattern<DimOp>::OpRewritePattern;
1008
1009	LogicalResult matchAndRewrite(DimOp dimOp,
1010	PatternRewriter &rewriter) const override {
1011	auto source = dimOp.getSource();
1012	auto destOp = source.getDefiningOp<DestinationStyleOpInterface>();
1013	if (!destOp)
1014	return failure();
1015
1016	auto resultIndex = cast<OpResult>(source).getResultNumber();
1017	auto *initOperand = destOp.getDpsInitOperand(resultIndex);
1018
1019	rewriter.modifyOpInPlace(
1020	dimOp, [&]() { dimOp.getSourceMutable().assign(initOperand->get()); });
1021	return success();
1022	}
1023	};
1024
1025	/// Fold dim of a tensor reshape operation to a extract into the reshape's shape
1026	/// operand.
1027	struct DimOfReshapeOp : public OpRewritePattern<DimOp> {
1028	using OpRewritePattern<DimOp>::OpRewritePattern;
1029
1030	LogicalResult matchAndRewrite(DimOp dim,
1031	PatternRewriter &rewriter) const override {
1032	auto reshape = dim.getSource().getDefiningOp<ReshapeOp>();
1033
1034	if (!reshape)
1035	return failure();
1036
1037	// Since tensors are immutable we don't need to worry about where to place
1038	// the extract call
1039	rewriter.setInsertionPointAfter(dim);
1040	Location loc = dim.getLoc();
1041	Value extract =
1042	rewriter.create<ExtractOp>(loc, reshape.getShape(), dim.getIndex());
1043	if (extract.getType() != dim.getType())
1044	extract =
1045	rewriter.create<arith::IndexCastOp>(loc, dim.getType(), extract);
1046	rewriter.replaceOp(dim, extract);
1047	return success();
1048	}
1049	};
1050	} // namespace
1051
1052	void DimOp::getCanonicalizationPatterns(RewritePatternSet &results,
1053	MLIRContext *context) {
1054	results.add<DimOfCastOp, DimOfDestStyleOp, DimOfReshapeOp>(context);
1055	}
1056
1057	//===----------------------------------------------------------------------===//
1058	// EmptyOp
1059	//===----------------------------------------------------------------------===//
1060
1061	void EmptyOp::build(OpBuilder &builder, OperationState &result,
1062	ArrayRef<int64_t> staticShape, Type elementType,
1063	Attribute encoding) {
1064	assert(none_of(staticShape, ShapedType::isDynamic) &&
1065	"expected only static sizes");
1066	build(builder, result, staticShape, elementType, ValueRange{}, encoding);
1067	}
1068
1069	void EmptyOp::build(OpBuilder &builder, OperationState &result,
1070	ArrayRef<int64_t> staticShape, Type elementType,
1071	ValueRange dynamicSizes, Attribute encoding) {
1072	auto tensorType = RankedTensorType::get(staticShape, elementType, encoding);
1073	build(builder, result, tensorType, dynamicSizes);
1074	}
1075
1076	void EmptyOp::build(OpBuilder &builder, OperationState &result,
1077	ArrayRef<OpFoldResult> sizes, Type elementType,
1078	Attribute encoding) {
1079	SmallVector<int64_t> staticShape;
1080	SmallVector<Value> dynamicSizes;
1081	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape);
1082	build(builder, result, staticShape, elementType, dynamicSizes, encoding);
1083	}
1084
1085	LogicalResult EmptyOp::verify() {
1086	if (getType().getNumDynamicDims() != getDynamicSizes().size())
1087	return emitOpError("incorrect number of dynamic sizes, has ")
1088	<< getDynamicSizes().size() << ", expected "
1089	<< getType().getNumDynamicDims();
1090	return success();
1091	}
1092
1093	LogicalResult
1094	EmptyOp::reifyResultShapes(OpBuilder &builder,
1095	ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
1096	reifiedReturnShapes.resize(`1`, SmallVector<OpFoldResult>(getType().getRank()));
1097	unsigned ctr = `0`;
1098	for (int64_t i = `0`; i < getType().getRank(); ++i) {
1099	if (getType().isDynamicDim(i)) {
1100	reifiedReturnShapes[`0`][i] = getDynamicSizes()[ctr++];
1101	} else {
1102	reifiedReturnShapes[`0`][i] = builder.getIndexAttr(getType().getDimSize(i));
1103	}
1104	}
1105	return success();
1106	}
1107
1108	Value EmptyOp::getDynamicSize(unsigned idx) {
1109	assert(getType().isDynamicDim(idx) && "expected dynamic dim");
1110	unsigned ctr = `0`;
1111	for (int64_t i = `0`; i < static_cast<int64_t>(idx); ++i)
1112	if (getType().isDynamicDim(i))
1113	++ctr;
1114	return getDynamicSizes()[ctr];
1115	}
1116
1117	SmallVector<OpFoldResult> EmptyOp::getMixedSizes() {
1118	SmallVector<OpFoldResult> result;
1119	unsigned ctr = `0`;
1120	OpBuilder b(getContext());
1121	for (int64_t i = `0`; i < getType().getRank(); ++i) {
1122	if (getType().isDynamicDim(i)) {
1123	result.push_back(getDynamicSizes()[ctr++]);
1124	} else {
1125	result.push_back(b.getIndexAttr(getType().getShape()[i]));
1126	}
1127	}
1128	return result;
1129	}
1130
1131	namespace {
1132	/// Change the type of the result of a `tensor.empty` by making the result
1133	/// type statically sized along dimensions that in the original operation were
1134	/// defined as dynamic, but the size was defined using a `constant` op. For
1135	/// example
1136	///
1137	/// %c5 = arith.constant 5: index
1138	/// %0 = tensor.empty(%arg0, %c5) : tensor<?x?xf32>
1139	///
1140	/// to
1141	///
1142	/// %0 = tensor.empty(%arg0) : tensor<?x5xf32>
1143	struct ReplaceEmptyTensorStaticShapeDims : OpRewritePattern<EmptyOp> {
1144	using OpRewritePattern<EmptyOp>::OpRewritePattern;
1145
1146	LogicalResult matchAndRewrite(EmptyOp op,
1147	PatternRewriter &rewriter) const override {
1148	SmallVector<Value> foldedDynamicSizes;
1149	RankedTensorType foldedTensorType = foldDynamicToStaticDimSizes(
1150	op.getType(), op.getDynamicSizes(), foldedDynamicSizes);
1151
1152	// Stop here if no dynamic size was promoted to static.
1153	if (foldedTensorType == op.getType())
1154	return failure();
1155
1156	auto newOp = rewriter.create<EmptyOp>(op.getLoc(), foldedTensorType,
1157	foldedDynamicSizes);
1158	rewriter.replaceOpWithNewOp<tensor::CastOp>(op, op.getType(), newOp);
1159	return success();
1160	}
1161	};
1162
1163	struct FoldEmptyTensorWithDimOp : public OpRewritePattern<DimOp> {
1164	using OpRewritePattern<DimOp>::OpRewritePattern;
1165
1166	LogicalResult matchAndRewrite(tensor::DimOp dimOp,
1167	PatternRewriter &rewriter) const override {
1168	std::optional<int64_t> maybeConstantIndex = dimOp.getConstantIndex();
1169	auto emptyTensorOp = dimOp.getSource().getDefiningOp<EmptyOp>();
1170	if (!emptyTensorOp \|\| !maybeConstantIndex)
1171	return failure();
1172	auto emptyTensorType = emptyTensorOp.getType();
1173	if (*maybeConstantIndex < `0` \|\|
1174	*maybeConstantIndex >= emptyTensorType.getRank() \|\|
1175	!emptyTensorType.isDynamicDim(*maybeConstantIndex))
1176	return failure();
1177	rewriter.replaceOp(dimOp,
1178	emptyTensorOp.getDynamicSize(*maybeConstantIndex));
1179	return success();
1180	}
1181	};
1182
1183	/// Canonicalize
1184	///
1185	/// ```mlir
1186	/// %0 = tensor.empty(%d0, %d1) : tensor<?x?xf32>
1187	/// %1 = tensor.cast %0 : tensor<?x?xf32> to tensor<4x?xf32>
1188	/// ```
1189	///
1190	/// into
1191	///
1192	/// ```mlir
1193	/// %0 = tensor.empty(%d1) : tensor<4x?xf32>
1194	/// ```
1195	///
1196	/// This assumes the input program is correct in terms of its shape. So it is
1197	/// safe to assume that `%d0` is in fact 4.
1198	struct FoldEmptyTensorWithCastOp : public OpRewritePattern<CastOp> {
1199	using OpRewritePattern<CastOp>::OpRewritePattern;
1200
1201	LogicalResult matchAndRewrite(CastOp castOp,
1202	PatternRewriter &rewriter) const override {
1203	if (!canFoldIntoProducerOp(castOp))
1204	return failure();
1205	auto producer = castOp.getSource().getDefiningOp<EmptyOp>();
1206	if (!producer)
1207	return failure();
1208
1209	auto resultType =
1210	llvm::cast<RankedTensorType>(castOp->getResult(`0`).getType());
1211	ArrayRef<int64_t> resultShape = resultType.getShape();
1212	SmallVector<OpFoldResult> currMixedSizes = producer.getMixedSizes();
1213	SmallVector<OpFoldResult> newMixedSizes;
1214	newMixedSizes.reserve(N: currMixedSizes.size());
1215	assert(resultShape.size() == currMixedSizes.size() &&
1216	"mismatch in result shape and sizes of empty op");
1217	for (auto it : llvm::zip(resultShape, currMixedSizes)) {
1218	int64_t newDim = std::get<`0`>(it);
1219	OpFoldResult currDim = std::get<`1`>(it);
1220	// Case 1: The empty tensor dim is static. Check that the tensor cast
1221	// result dim matches.
1222	if (auto attr = llvm::dyn_cast_if_present<Attribute>(currDim)) {
1223	if (ShapedType::isDynamic(newDim) \|\|
1224	newDim != llvm::cast<IntegerAttr>(attr).getInt()) {
1225	// Something is off, the cast result shape cannot be more dynamic
1226	// than the empty tensor result shape (enforced by
1227	// `canFoldIntoProducer`). Abort for now.
1228	return rewriter.notifyMatchFailure(
1229	producer, "mismatch in static value of shape of empty tensor "
1230	"result and cast result");
1231	}
1232	newMixedSizes.push_back(attr);
1233	continue;
1234	}
1235
1236	// Case 2 : The tensor cast shape is static, but empty tensor result
1237	// shape is dynamic.
1238	if (!ShapedType::isDynamic(newDim)) {
1239	newMixedSizes.push_back(rewriter.getIndexAttr(newDim));
1240	continue;
1241	}
1242
1243	// Case 3 : The tensor cast shape is dynamic and empty tensor result
1244	// shape is dynamic. Use the dynamic value from the empty tensor op.
1245	newMixedSizes.push_back(currDim);
1246	}
1247
1248	// TODO: Do not drop tensor encoding.
1249	rewriter.replaceOpWithNewOp<EmptyOp>(castOp, newMixedSizes,
1250	resultType.getElementType());
1251	return success();
1252	}
1253	};
1254
1255	} // namespace
1256
1257	void EmptyOp::getCanonicalizationPatterns(RewritePatternSet &results,
1258	MLIRContext *context) {
1259	results.add<FoldEmptyTensorWithCastOp, FoldEmptyTensorWithDimOp,
1260	ReplaceEmptyTensorStaticShapeDims>(context);
1261	}
1262
1263	//===----------------------------------------------------------------------===//
1264	// ExtractOp
1265	//===----------------------------------------------------------------------===//
1266
1267	namespace {
1268
1269	/// Canonicalizes the pattern of the form
1270	///
1271	/// %val = tensor.cast %source : : tensor<?xi32> to tensor<2xi32>
1272	/// %extracted_element = tensor.extract %val[%c0] : tensor<2xi32>
1273	///
1274	/// to
1275	///
1276	/// %extracted_element = tensor.extract %source[%c0] : tensor<?xi32>
1277	struct ExtractFromTensorCast : public OpRewritePattern<tensor::ExtractOp> {
1278	using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
1279
1280	LogicalResult matchAndRewrite(tensor::ExtractOp extract,
1281	PatternRewriter &rewriter) const final {
1282	auto tensorCast = extract.getTensor().getDefiningOp<tensor::CastOp>();
1283	if (!tensorCast)
1284	return failure();
1285	if (!llvm::isa<RankedTensorType>(tensorCast.getSource().getType()))
1286	return failure();
1287	rewriter.replaceOpWithNewOp<tensor::ExtractOp>(
1288	extract, tensorCast.getSource(), extract.getIndices());
1289	return success();
1290	}
1291	};
1292
1293	/// Canonicalizes the pattern of the form
1294	///
1295	/// %val = tensor.collapse_shape %src[[0, 1]] : tensor<3x4xf64> into
1296	/// tensor<12xf64>
1297	/// %extracted_element = tensor.extract %val[%c10] :
1298	/// tensor<12xf64>
1299	///
1300	/// to
1301	///
1302	/// %extracted_element = tensor.extract %src[%c2, %c2] : tensor<3x4xf64>
1303	struct ExtractFromCollapseShape : public OpRewritePattern<tensor::ExtractOp> {
1304	using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
1305
1306	LogicalResult matchAndRewrite(tensor::ExtractOp extractOp,
1307	PatternRewriter &rewriter) const final {
1308	auto collapseOp =
1309	extractOp.getTensor().getDefiningOp<tensor::CollapseShapeOp>();
1310	if (!collapseOp)
1311	return failure();
1312	if (!collapseOp.getSrcType().hasStaticShape())
1313	return failure();
1314
1315	auto sourceSizes = collapseOp.getSrcType().getShape();
1316
1317	SmallVector<Value> indices(extractOp.getIndices().begin(),
1318	extractOp.getIndices().end());
1319	SmallVector<Value> sourceIndices;
1320	for (auto [index, group] :
1321	llvm::zip(indices, collapseOp.getReassociationIndices())) {
1322	assert(!group.empty() && "association indices groups cannot be empty");
1323	auto groupSize = group.size();
1324
1325	if (groupSize == `1`) {
1326	sourceIndices.push_back(index);
1327	continue;
1328	}
1329
1330	SmallVector<int64_t> basis =
1331	llvm::map_to_vector(group, [&](int64_t d) { return sourceSizes[d]; });
1332	auto delinearize = rewriter.create<affine::AffineDelinearizeIndexOp>(
1333	extractOp.getLoc(), index, basis, /hasOuterBound=/true);
1334	llvm::append_range(sourceIndices, delinearize.getResults());
1335	}
1336	if (collapseOp.getReassociationIndices().empty()) {
1337	auto zeroAffineMap = rewriter.getConstantAffineMap(val: `0`);
1338	int64_t srcRank =
1339	cast<RankedTensorType>(collapseOp.getSrcType()).getRank();
1340	OpFoldResult ofr = affine::makeComposedFoldedAffineApply(
1341	rewriter, extractOp.getLoc(), zeroAffineMap,
1342	ArrayRef<OpFoldResult>{});
1343	for (int64_t i = `0`; i < srcRank; i++) {
1344	sourceIndices.push_back(
1345	Elt: getValueOrCreateConstantIndexOp(rewriter, extractOp.getLoc(), ofr));
1346	}
1347	}
1348
1349	rewriter.replaceOpWithNewOp<tensor::ExtractOp>(
1350	extractOp, collapseOp.getSrc(), sourceIndices);
1351	return success();
1352	}
1353	};
1354
1355	} // namespace
1356
1357	void ExtractOp::getAsmResultNames(
1358	function_ref<void(Value, StringRef)> setNameFn) {
1359	setNameFn(getResult(), "extracted");
1360	}
1361
1362	LogicalResult ExtractOp::verify() {
1363	// Verify the # indices match if we have a ranked type.
1364	auto tensorType = llvm::cast<RankedTensorType>(getTensor().getType());
1365	if (tensorType.getRank() != static_cast<int64_t>(getIndices().size()))
1366	return emitOpError("incorrect number of indices for extract_element");
1367	return success();
1368	}
1369
1370	/// If we have an ExtractOp consuming an InsertOp with the same
1371	/// indices, we can return the InsertOp's scalar directly.
1372	// TODO: This only checks the immediate producer; extend to go up the
1373	// insert/extract chain if the slices are disjoint.
1374	static Value foldExtractAfterInsert(ExtractOp extractOp) {
1375	auto insertOp = extractOp.getTensor().getDefiningOp<InsertOp>();
1376
1377	auto isSame = [](Value a, Value b) {
1378	return getAsOpFoldResult(val: a) == getAsOpFoldResult(val: b);
1379	};
1380	if (insertOp && insertOp.getScalar().getType() == extractOp.getType() &&
1381	llvm::equal(insertOp.getIndices(), extractOp.getIndices(), isSame))
1382	return insertOp.getScalar();
1383
1384	return {};
1385	}
1386
1387	OpFoldResult ExtractOp::fold(FoldAdaptor adaptor) {
1388	if (Attribute tensor = adaptor.getTensor()) {
1389	// If this is a splat elements attribute, simply return the value.
1390	// All of the elements of a splat attribute are the same.
1391	if (auto splatTensor = llvm::dyn_cast<SplatElementsAttr>(tensor))
1392	return splatTensor.getSplatValue<Attribute>();
1393
1394	// If this is a dense resource elements attribute, return.
1395	if (isa<DenseResourceElementsAttr>(tensor))
1396	return {};
1397	}
1398
1399	// Collect the constant indices into the tensor.
1400	SmallVector<uint64_t, `8`> indices;
1401	for (Attribute indice : adaptor.getIndices()) {
1402	if (!indice \|\| !llvm::isa<IntegerAttr>(indice))
1403	return {};
1404	indices.push_back(llvm::cast<IntegerAttr>(indice).getInt());
1405	}
1406
1407	// Fold extract(from_elements(...)).
1408	if (auto fromElementsOp = getTensor().getDefiningOp<FromElementsOp>()) {
1409	auto tensorType = llvm::cast<RankedTensorType>(fromElementsOp.getType());
1410	auto rank = tensorType.getRank();
1411	assert(static_cast<int64_t>(indices.size()) == tensorType.getRank() &&
1412	"rank mismatch");
1413	int flatIndex = `0`;
1414	int stride = `1`;
1415	for (int i = rank - `1`; i >= `0`; --i) {
1416	flatIndex += indices[i] * stride;
1417	stride *= tensorType.getDimSize(i);
1418	}
1419	// Prevent out of bounds accesses. This can happen in invalid code that
1420	// will never execute.
1421	if (static_cast<int>(fromElementsOp.getElements().size()) <= flatIndex \|\|
1422	flatIndex < `0`)
1423	return {};
1424	return fromElementsOp.getElements()[flatIndex];
1425	}
1426
1427	// If this is an elements attribute, query the value at the given indices.
1428	if (Attribute tensor = adaptor.getTensor()) {
1429	auto elementsAttr = llvm::dyn_cast<ElementsAttr>(tensor);
1430	if (elementsAttr && elementsAttr.isValidIndex(indices))
1431	return elementsAttr.getValues<Attribute>()[indices];
1432	}
1433
1434	if (Value result = foldExtractAfterInsert(*this))
1435	return result;
1436
1437	return {};
1438	}
1439
1440	void ExtractOp::getCanonicalizationPatterns(RewritePatternSet &results,
1441	MLIRContext *context) {
1442	results.add<ExtractFromTensorCast>(context);
1443	}
1444
1445	void mlir::tensor::populateFoldCollapseExtractPatterns(
1446	RewritePatternSet &patterns) {
1447	patterns.add<ExtractFromCollapseShape>(arg: patterns.getContext());
1448	}
1449
1450	//===----------------------------------------------------------------------===//
1451	// FromElementsOp
1452	//===----------------------------------------------------------------------===//
1453
1454	void FromElementsOp::getAsmResultNames(
1455	function_ref<void(Value, StringRef)> setNameFn) {
1456	setNameFn(getResult(), "from_elements");
1457	}
1458
1459	void FromElementsOp::build(OpBuilder &builder, OperationState &result,
1460	ValueRange elements) {
1461	assert(!elements.empty() && "expected at least one element");
1462	Type resultType = RankedTensorType::get(
1463	{static_cast<int64_t>(elements.size())}, elements.front().getType());
1464	build(builder, result, resultType, elements);
1465	}
1466
1467	OpFoldResult FromElementsOp::fold(FoldAdaptor adaptor) {
1468	if (!llvm::is_contained(adaptor.getElements(), nullptr))
1469	return DenseElementsAttr::get(getType(), adaptor.getElements());
1470	return {};
1471	}
1472
1473	namespace {
1474
1475	// Pushes the index_casts that occur before extractions to after the extract.
1476	// This minimizes type conversion in some cases and enables the extract
1477	// canonicalizer. This changes:
1478	//
1479	// %cast = arith.index_cast %tensor : tensor<1xi32> to tensor<1xindex>
1480	// %extract = tensor.extract %cast[%index] : tensor<1xindex>
1481	//
1482	// to the following:
1483	//
1484	// %extract = tensor.extract %tensor[%index] : tensor<1xindex>
1485	// %cast = arith.index_cast %extract : i32 to index
1486	//
1487	// to just %element.
1488	//
1489	// Consider expanding this to a template and handle all tensor cast
1490	// operations.
1491	struct ExtractElementFromIndexCast
1492	: public OpRewritePattern<tensor::ExtractOp> {
1493	using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
1494
1495	LogicalResult matchAndRewrite(tensor::ExtractOp extract,
1496	PatternRewriter &rewriter) const final {
1497	Location loc = extract.getLoc();
1498	auto indexCast = extract.getTensor().getDefiningOp<arith::IndexCastOp>();
1499	if (!indexCast)
1500	return failure();
1501
1502	Type elementTy = getElementTypeOrSelf(indexCast.getIn());
1503
1504	auto newExtract = rewriter.create<tensor::ExtractOp>(
1505	loc, elementTy, indexCast.getIn(), extract.getIndices());
1506
1507	rewriter.replaceOpWithNewOp<arith::IndexCastOp>(extract, extract.getType(),
1508	newExtract);
1509
1510	return success();
1511	}
1512	};
1513
1514	} // namespace
1515
1516	void FromElementsOp::getCanonicalizationPatterns(RewritePatternSet &results,
1517	MLIRContext *context) {
1518	results.add<ExtractElementFromIndexCast>(context);
1519	}
1520
1521	//===----------------------------------------------------------------------===//
1522	// GatherOp
1523	//===----------------------------------------------------------------------===//
1524
1525	void GatherOp::getAsmResultNames(
1526	function_ref<void(Value, StringRef)> setNameFn) {
1527	setNameFn(getResult(), "gather");
1528	}
1529
1530	/// Return the inferred result type for a gatherOp where:
1531	/// - sourceType is the type of the source tensor gathered from
1532	/// - indicesType is the type of the indices used to gather
1533	/// - gatherDims are the dims along which the gather occurs.
1534	/// Return a full rank or ranked-reduced variant of the type depending on
1535	/// the value of rankReduced.
1536	///
1537	/// The leading dimensions of the index tensor give the result tensor its
1538	/// leading dimensions.
1539	/// The trailing dimensions of the result tensor are obtained from the source
1540	/// tensor by setting the dimensions specified in gather_dims to `1` (if
1541	/// rankedReduced is false), or skipping them (otherwise).
1542	RankedTensorType GatherOp::inferResultType(RankedTensorType sourceType,
1543	RankedTensorType indicesType,
1544	ArrayRef<int64_t> gatherDims,
1545	bool rankReduced) {
1546	SmallVector<int64_t> resultShape(indicesType.getShape().drop_back());
1547	resultShape.reserve(resultShape.size() + sourceType.getRank());
1548	for (int64_t idx : llvm::seq<int64_t>(`0`, sourceType.getRank())) {
1549	if (llvm::binary_search(gatherDims, idx)) {
1550	if (!rankReduced)
1551	resultShape.push_back(`1`);
1552	continue;
1553	}
1554	resultShape.push_back(sourceType.getDimSize(idx));
1555	}
1556	return RankedTensorType::Builder(sourceType).setShape(resultShape);
1557	}
1558
1559	static LogicalResult
1560	verifyGatherOrScatterDims(Operation *op, ArrayRef<int64_t> dims,
1561	ArrayRef<int64_t> indices, int64_t rank,
1562	StringRef gatherOrScatter, StringRef sourceOrDest) {
1563	if (dims.empty())
1564	return op->emitOpError(message: gatherOrScatter) << "_dims must be non-empty";
1565
1566	int64_t numGatherDims = dims.size();
1567	if (numGatherDims > rank)
1568	return op->emitOpError(message: gatherOrScatter)
1569	<< "_dims overflow " << sourceOrDest << " rank";
1570	if (indices.empty() \|\| indices.back() != numGatherDims)
1571	return op->emitOpError(message: gatherOrScatter)
1572	<< "_dims length must match the size of last dimension of indices";
1573	for (int64_t val : dims) {
1574	if (val < `0`)
1575	return op->emitOpError(message: gatherOrScatter)
1576	<< "_dims value must be non-negative";
1577	if (val >= rank)
1578	return op->emitOpError(message: gatherOrScatter)
1579	<< "_dims value must be smaller than " << sourceOrDest << " rank";
1580	}
1581	for (int64_t i = `1`; i < numGatherDims; ++i) {
1582	if (dims [i - `1`] >= dims [i])
1583	return op->emitOpError(message: gatherOrScatter)
1584	<< "_dims values must be strictly increasing";
1585	}
1586	return success();
1587	}
1588
1589	LogicalResult GatherOp::verify() {
1590	int64_t sourceRank = getSourceType().getRank();
1591	ArrayRef<int64_t> gatherDims = getGatherDims();
1592	if (failed(verifyGatherOrScatterDims(getOperation(), gatherDims,
1593	getIndicesType().getShape(), sourceRank,
1594	"gather", "source")))
1595	return failure();
1596
1597	RankedTensorType expectedResultType = GatherOp::inferResultType(
1598	getSourceType(), getIndicesType(), gatherDims, /rankReduced=/false);
1599	RankedTensorType expectedRankReducedResultType = GatherOp::inferResultType(
1600	getSourceType(), getIndicesType(), gatherDims, /rankReduced=/true);
1601	if (getResultType() != expectedResultType &&
1602	getResultType() != expectedRankReducedResultType) {
1603	return emitOpError("result type "
1604	"mismatch: "
1605	"expected ")
1606	<< expectedResultType << " or its rank-reduced variant "
1607	<< expectedRankReducedResultType << " (got: " << getResultType()
1608	<< ")";
1609	}
1610
1611	return success();
1612	}
1613
1614	OpFoldResult GatherOp::fold(FoldAdaptor adaptor) {
1615	if (OpFoldResult reshapedSource = reshapeConstantSource(
1616	llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
1617	getResult().getType()))
1618	return reshapedSource;
1619	return {};
1620	}
1621
1622	//===----------------------------------------------------------------------===//
1623	// InsertOp
1624	//===----------------------------------------------------------------------===//
1625
1626	void InsertOp::getAsmResultNames(
1627	function_ref<void(Value, StringRef)> setNameFn) {
1628	setNameFn(getResult(), "inserted");
1629	}
1630
1631	LogicalResult InsertOp::verify() {
1632	// Verify the # indices match if we have a ranked type.
1633	auto destType = llvm::cast<RankedTensorType>(getDest().getType());
1634	if (destType.getRank() != static_cast<int64_t>(getIndices().size()))
1635	return emitOpError("incorrect number of indices");
1636	return success();
1637	}
1638
1639	OpFoldResult InsertOp::fold(FoldAdaptor adaptor) {
1640	Attribute scalar = adaptor.getScalar();
1641	Attribute dest = adaptor.getDest();
1642	if (scalar && dest)
1643	if (auto splatDest = llvm::dyn_cast<SplatElementsAttr>(dest))
1644	if (scalar == splatDest.getSplatValue<Attribute>())
1645	return dest;
1646	return {};
1647	}
1648
1649	//===----------------------------------------------------------------------===//
1650	// GenerateOp
1651	//===----------------------------------------------------------------------===//
1652
1653	void GenerateOp::getAsmResultNames(
1654	function_ref<void(Value, StringRef)> setNameFn) {
1655	setNameFn(getResult(), "generated");
1656	}
1657
1658	LogicalResult GenerateOp::reifyResultShapes(
1659	OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
1660	reifiedReturnShapes.resize(`1`, SmallVector<OpFoldResult>(getType().getRank()));
1661	int idx = `0`;
1662	for (auto dim : llvm::seq<int64_t>(`0`, getType().getRank())) {
1663	if (getType().isDynamicDim(dim)) {
1664	reifiedReturnShapes[`0`][dim] = getOperand(idx++);
1665	} else {
1666	reifiedReturnShapes[`0`][dim] =
1667	builder.getIndexAttr(getType().getDimSize(dim));
1668	}
1669	}
1670	return success();
1671	}
1672
1673	LogicalResult GenerateOp::verify() {
1674	// Ensure that the tensor type has as many dynamic dimensions as are
1675	// specified by the operands.
1676	RankedTensorType resultType = llvm::cast<RankedTensorType>(getType());
1677	if (getNumOperands() != resultType.getNumDynamicDims())
1678	return emitError("must have as many index operands as dynamic extents "
1679	"in the result type");
1680	return success();
1681	}
1682
1683	LogicalResult GenerateOp::verifyRegions() {
1684	RankedTensorType resultTy = llvm::cast<RankedTensorType>(getType());
1685	// Ensure that region arguments span the index space.
1686	if (!llvm::all_of(getBody().getArgumentTypes(),
1687	[](Type ty) { return ty.isIndex(); }))
1688	return emitError("all body arguments must be index");
1689	if (getBody().getNumArguments() != resultTy.getRank())
1690	return emitError("must have one body argument per input dimension");
1691
1692	// Ensure that the region yields an element of the right type.
1693	auto yieldOp = cast<YieldOp>(getBody().getBlocks().front().getTerminator());
1694
1695	if (yieldOp.getValue().getType() != resultTy.getElementType())
1696	return emitOpError(
1697	"body must be terminated with a `yield` operation of the tensor "
1698	"element type");
1699
1700	return success();
1701	}
1702
1703	void GenerateOp::build(
1704	OpBuilder &b, OperationState &result, Type resultTy,
1705	ValueRange dynamicExtents,
1706	function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuilder) {
1707	build(b, result, resultTy, dynamicExtents);
1708
1709	// Build and populate body.
1710	OpBuilder::InsertionGuard guard(b);
1711	Region *bodyRegion = result.regions.front().get();
1712	auto rank = llvm::cast<RankedTensorType>(resultTy).getRank();
1713	SmallVector<Type, `2`> argumentTypes(rank, b.getIndexType());
1714	SmallVector<Location, `2`> argumentLocs(rank, result.location);
1715	Block *bodyBlock =
1716	b.createBlock(bodyRegion, bodyRegion->end(), argumentTypes, argumentLocs);
1717	bodyBuilder(b, result.location, bodyBlock->getArguments());
1718	}
1719
1720	namespace {
1721
1722	/// Canonicalizes tensor.generate operations with a constant
1723	/// operand into the equivalent operation with the operand expressed in the
1724	/// result type, instead. We also insert a type cast to make sure that the
1725	/// resulting IR is still well-typed.
1726	struct StaticTensorGenerate : public OpRewritePattern<GenerateOp> {
1727	using OpRewritePattern<GenerateOp>::OpRewritePattern;
1728
1729	LogicalResult matchAndRewrite(GenerateOp generateOp,
1730	PatternRewriter &rewriter) const final {
1731	SmallVector<Value> foldedDynamicSizes;
1732	RankedTensorType foldedTensorType = foldDynamicToStaticDimSizes(
1733	generateOp.getType(), generateOp.getDynamicExtents(),
1734	foldedDynamicSizes);
1735
1736	// Stop here if no dynamic size was promoted to static.
1737	if (foldedTensorType == generateOp.getType())
1738	return failure();
1739
1740	auto loc = generateOp.getLoc();
1741	auto newOp =
1742	rewriter.create<GenerateOp>(loc, foldedTensorType, foldedDynamicSizes);
1743	rewriter.inlineRegionBefore(generateOp.getBody(), newOp.getBody(),
1744	newOp.getBody().begin());
1745	rewriter.replaceOpWithNewOp<tensor::CastOp>(generateOp,
1746	generateOp.getType(), newOp);
1747	return success();
1748	}
1749	};
1750
1751	/// Canonicalizes the pattern of the form
1752	///
1753	/// %tensor = tensor.generate %x {
1754	/// ^bb0(%arg0: index):
1755	/// <computation>
1756	/// yield %1 : index
1757	/// } : tensor<?xindex>
1758	/// %extracted_element = tensor.extract %tensor[%c0] : tensor<?xi32>
1759	///
1760	/// to just <computation> with %arg0 replaced by %c0. We only do this if the
1761	/// tensor.generate operation has no side-effects.
1762	struct ExtractFromTensorGenerate : public OpRewritePattern<tensor::ExtractOp> {
1763	using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
1764
1765	LogicalResult matchAndRewrite(tensor::ExtractOp extract,
1766	PatternRewriter &rewriter) const final {
1767	auto tensorFromElements = extract.getTensor().getDefiningOp<GenerateOp>();
1768	if (!tensorFromElements \|\| !wouldOpBeTriviallyDead(tensorFromElements))
1769	return failure();
1770
1771	IRMapping mapping;
1772	Block *body = &tensorFromElements.getBody().front();
1773	mapping.map(body->getArguments(), extract.getIndices());
1774	for (auto &op : body->without_terminator())
1775	rewriter.clone(op, mapping);
1776
1777	auto yield = cast<YieldOp>(body->getTerminator());
1778
1779	rewriter.replaceOp(extract, mapping.lookupOrDefault(yield.getValue()));
1780	return success();
1781	}
1782	};
1783
1784	} // namespace
1785
1786	void GenerateOp::getCanonicalizationPatterns(RewritePatternSet &results,
1787	MLIRContext *context) {
1788	// TODO: Move extract pattern to tensor::ExtractOp.
1789	results.add<ExtractFromTensorGenerate, StaticTensorGenerate>(context);
1790	}
1791
1792	//===----------------------------------------------------------------------===//
1793	// RankOp
1794	//===----------------------------------------------------------------------===//
1795
1796	void RankOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
1797	setNameFn(getResult(), "rank");
1798	}
1799
1800	OpFoldResult RankOp::fold(FoldAdaptor adaptor) {
1801	// Constant fold rank when the rank of the operand is known.
1802	auto type = getOperand().getType();
1803	auto shapedType = llvm::dyn_cast<ShapedType>(type);
1804	if (shapedType && shapedType.hasRank())
1805	return IntegerAttr::get(IndexType::get(getContext()), shapedType.getRank());
1806	return IntegerAttr();
1807	}
1808
1809	//===----------------------------------------------------------------------===//
1810	// ReshapeOp
1811	//===----------------------------------------------------------------------===//
1812
1813	void ReshapeOp::getAsmResultNames(
1814	function_ref<void(Value, StringRef)> setNameFn) {
1815	setNameFn(getResult(), "reshape");
1816	}
1817
1818	static int64_t getNumElements(ShapedType type) {
1819	int64_t numElements = `1`;
1820	for (auto dim : type.getShape())
1821	numElements *= dim;
1822	return numElements;
1823	}
1824
1825	LogicalResult ReshapeOp::verify() {
1826	TensorType operandType = llvm::cast<TensorType>(getSource().getType());
1827	TensorType resultType = llvm::cast<TensorType>(getResult().getType());
1828
1829	if (operandType.getElementType() != resultType.getElementType())
1830	return emitOpError("element types of source and destination tensor "
1831	"types should be the same");
1832
1833	int64_t shapeSize =
1834	llvm::cast<RankedTensorType>(getShape().getType()).getDimSize(`0`);
1835	auto resultRankedType = llvm::dyn_cast<RankedTensorType>(resultType);
1836	auto operandRankedType = llvm::dyn_cast<RankedTensorType>(operandType);
1837
1838	if (resultRankedType) {
1839	if (operandRankedType && resultRankedType.hasStaticShape() &&
1840	operandRankedType.hasStaticShape()) {
1841	if (getNumElements(operandRankedType) != getNumElements(resultRankedType))
1842	return emitOpError("source and destination tensor should have the "
1843	"same number of elements");
1844	}
1845	if (ShapedType::isDynamic(shapeSize))
1846	return emitOpError("cannot use shape operand with dynamic length to "
1847	"reshape to statically-ranked tensor type");
1848	if (shapeSize != resultRankedType.getRank())
1849	return emitOpError(
1850	"length of shape operand differs from the result's tensor rank");
1851	}
1852	return success();
1853	}
1854
1855	OpFoldResult ReshapeOp::fold(FoldAdaptor adaptor) {
1856	if (OpFoldResult reshapedSource = reshapeConstantSource(
1857	llvm::dyn_cast_if_present<DenseElementsAttr>(adaptor.getSource()),
1858	getResult().getType()))
1859	return reshapedSource;
1860
1861	// If the producer of operand 'source' is another 'tensor.reshape' op, use the
1862	// producer's input instead as the original tensor to reshape. This could
1863	// render such producer dead code.
1864	if (auto reshapeOpProducer = getSource().getDefiningOp<ReshapeOp>()) {
1865	getSourceMutable().assign(reshapeOpProducer.getSource());
1866	return getResult();
1867	}
1868
1869	auto source = getSource();
1870	auto sourceTy = dyn_cast<RankedTensorType>(source.getType());
1871	auto resultTy = dyn_cast<RankedTensorType>(getType());
1872	if (!sourceTy \|\| !resultTy \|\| sourceTy != resultTy)
1873	return {};
1874
1875	// If the source and result are both 1D tensors and have the same type, the
1876	// reshape has no effect, even if the tensor is dynamically shaped.
1877	if (sourceTy.getRank() == `1`)
1878	return source;
1879
1880	if (auto fromElements = getShape().getDefiningOp<tensor::FromElementsOp>()) {
1881	auto elements = fromElements.getElements();
1882	bool dynamicNoop =
1883	sourceTy.getRank() == static_cast<int64_t>(elements.size());
1884	for (int id = `0`, s = elements.size(); id < s && dynamicNoop; ++id) {
1885	auto element = elements[id];
1886
1887	if (auto cst = getConstantIntValue(element)) {
1888	dynamicNoop &= cst.value() == sourceTy.getDimSize(id);
1889	continue;
1890	}
1891
1892	if (auto dimOp = element.getDefiningOp<tensor::DimOp>()) {
1893	dynamicNoop &= dimOp.getSource() == source;
1894
1895	auto cst = getConstantIntValue(dimOp.getIndex());
1896	dynamicNoop &=
1897	cst.has_value() && cst.value() == static_cast<int64_t>(id);
1898	continue;
1899	}
1900
1901	dynamicNoop = false;
1902	break;
1903	}
1904
1905	if (dynamicNoop)
1906	return source;
1907	}
1908
1909	return {};
1910	}
1911
1912	//===----------------------------------------------------------------------===//
1913	// Reassociative reshape ops
1914	//===----------------------------------------------------------------------===//
1915
1916	void CollapseShapeOp::getAsmResultNames(
1917	function_ref<void(Value, StringRef)> setNameFn) {
1918	setNameFn(getResult(), "collapsed");
1919	}
1920
1921	void ExpandShapeOp::getAsmResultNames(
1922	function_ref<void(Value, StringRef)> setNameFn) {
1923	setNameFn(getResult(), "expanded");
1924	}
1925
1926	int64_t ExpandShapeOp::getCorrespondingSourceDim(int64_t resultDim) {
1927	assert(resultDim >= `0` && resultDim < getResultType().getRank() &&
1928	"invalid resultDim");
1929	for (const auto &it : llvm::enumerate(getReassociationIndices()))
1930	if (llvm::is_contained(it.value(), resultDim))
1931	return it.index();
1932	llvm_unreachable("could not find reassociation group");
1933	}
1934
1935	FailureOr<SmallVector<OpFoldResult>>
1936	ExpandShapeOp::inferOutputShape(OpBuilder &b, Location loc,
1937	RankedTensorType expandedType,
1938	ArrayRef<ReassociationIndices> reassociation,
1939	ArrayRef<OpFoldResult> inputShape) {
1940	std::optional<SmallVector<OpFoldResult>> outputShape =
1941	inferExpandShapeOutputShape(b, loc, expandedType, reassociation,
1942	inputShape);
1943	if (!outputShape)
1944	return failure();
1945	return *outputShape;
1946	}
1947
1948	SmallVector<OpFoldResult> ExpandShapeOp::getMixedOutputShape() {
1949	return getMixedValues(getStaticOutputShape(), getOutputShape(), getContext());
1950	}
1951
1952	void ExpandShapeOp::build(OpBuilder &builder, OperationState &result,
1953	Type resultType, Value src,
1954	ArrayRef<ReassociationIndices> reassociation,
1955	ArrayRef<OpFoldResult> outputShape) {
1956	auto [staticOutputShape, dynamicOutputShape] =
1957	decomposeMixedValues(SmallVector<OpFoldResult>(outputShape));
1958	build(builder, result, cast<RankedTensorType>(resultType), src,
1959	getReassociationIndicesAttribute(builder, reassociation),
1960	dynamicOutputShape, staticOutputShape);
1961	}
1962
1963	void ExpandShapeOp::build(OpBuilder &builder, OperationState &result,
1964	Type resultType, Value src,
1965	ArrayRef<ReassociationIndices> reassociation) {
1966	SmallVector<OpFoldResult> inputShape =
1967	getMixedSizes(builder, result.location, src);
1968	auto tensorResultTy = cast<RankedTensorType>(resultType);
1969	FailureOr<SmallVector<OpFoldResult>> outputShape = inferOutputShape(
1970	builder, result.location, tensorResultTy, reassociation, inputShape);
1971	SmallVector<OpFoldResult> outputShapeOrEmpty;
1972	if (succeeded(outputShape)) {
1973	outputShapeOrEmpty = *outputShape;
1974	}
1975	build(builder, result, tensorResultTy, src, reassociation,
1976	outputShapeOrEmpty);
1977	}
1978
1979	SmallVector<AffineMap, `4`> CollapseShapeOp::getReassociationMaps() {
1980	return getSymbolLessAffineMaps(getReassociationExprs());
1981	}
1982	SmallVector<ReassociationExprs, `4`> CollapseShapeOp::getReassociationExprs() {
1983	return convertReassociationIndicesToExprs(getContext(),
1984	getReassociationIndices());
1985	}
1986
1987	SmallVector<AffineMap, `4`> ExpandShapeOp::getReassociationMaps() {
1988	return getSymbolLessAffineMaps(getReassociationExprs());
1989	}
1990	SmallVector<ReassociationExprs, `4`> ExpandShapeOp::getReassociationExprs() {
1991	return convertReassociationIndicesToExprs(getContext(),
1992	getReassociationIndices());
1993	}
1994
1995	RankedTensorType CollapseShapeOp::inferCollapsedType(
1996	RankedTensorType type, SmallVector<ReassociationIndices> reassociation) {
1997	return inferCollapsedType(
1998	type, getSymbolLessAffineMaps(convertReassociationIndicesToExprs(
1999	type.getContext(), reassociation)));
2000	}
2001
2002	/// Compute the RankedTensorType obtained by applying `reassociation` to
2003	/// `type`.
2004	RankedTensorType
2005	CollapseShapeOp::inferCollapsedType(RankedTensorType type,
2006	ArrayRef<AffineMap> reassociation) {
2007	auto shape = type.getShape();
2008	SmallVector<int64_t, `4`> newShape;
2009	newShape.reserve(reassociation.size());
2010
2011	// Use the fact that reassociation is valid to simplify the logic: only use
2012	// each map's rank.
2013	assert(isReassociationValid(reassociation) && "invalid reassociation");
2014	unsigned currentDim = `0`;
2015	for (AffineMap m : reassociation) {
2016	unsigned dim = m.getNumResults();
2017	auto band = shape.slice(currentDim, dim);
2018	int64_t size = `1`;
2019	if (llvm::is_contained(band, ShapedType::kDynamic))
2020	size = ShapedType::kDynamic;
2021	else
2022	for (unsigned d = `0`; d < dim; ++d)
2023	size *= shape[currentDim + d];
2024	newShape.push_back(size);
2025	currentDim += dim;
2026	}
2027
2028	return RankedTensorType::get(newShape, type.getElementType());
2029	}
2030
2031	void CollapseShapeOp::build(OpBuilder &b, OperationState &result, Value src,
2032	ArrayRef<ReassociationIndices> reassociation,
2033	ArrayRef<NamedAttribute> attrs) {
2034	auto resultType = inferCollapsedType(
2035	llvm::cast<RankedTensorType>(src.getType()),
2036	getSymbolLessAffineMaps(
2037	convertReassociationIndicesToExprs(b.getContext(), reassociation)));
2038	result.addAttribute(getReassociationAttrStrName(),
2039	getReassociationIndicesAttribute(b, reassociation));
2040	build(b, result, resultType, src, attrs);
2041	}
2042
2043	template <typename TensorReshapeOp, bool isExpansion = std::is_same<
2044	TensorReshapeOp, ExpandShapeOp>::value>
2045	static LogicalResult verifyTensorReshapeOp(TensorReshapeOp op,
2046	RankedTensorType expandedType,
2047	RankedTensorType collapsedType) {
2048	if (failed(
2049	verifyReshapeLikeTypes(op, expandedType, collapsedType, isExpansion)))
2050	return failure();
2051
2052	auto maps = op.getReassociationMaps();
2053	RankedTensorType expectedType =
2054	CollapseShapeOp::inferCollapsedType(expandedType, maps);
2055	if (!isSameTypeWithoutEncoding(collapsedType, expectedType))
2056	return op.emitOpError("expected collapsed type to be ")
2057	<< expectedType << ", but got " << collapsedType;
2058	return success();
2059	}
2060
2061	LogicalResult ExpandShapeOp::verify() {
2062	auto srcType = getSrcType();
2063	auto resultType = getResultType();
2064
2065	if ((int64_t)getStaticOutputShape().size() != resultType.getRank())
2066	return emitOpError("expected number of static shape dims to be equal to "
2067	"the output rank (")
2068	<< resultType.getRank() << ") but found "
2069	<< getStaticOutputShape().size() << " inputs instead";
2070
2071	if ((int64_t)getOutputShape().size() !=
2072	llvm::count(getStaticOutputShape(), ShapedType::kDynamic))
2073	return emitOpError("mismatch in dynamic dims in output_shape and "
2074	"static_output_shape: static_output_shape has ")
2075	<< llvm::count(getStaticOutputShape(), ShapedType::kDynamic)
2076	<< " dynamic dims while output_shape has " << getOutputShape().size()
2077	<< " values";
2078
2079	return verifyTensorReshapeOp(*this, resultType, srcType);
2080	}
2081
2082	LogicalResult CollapseShapeOp::verify() {
2083	return verifyTensorReshapeOp(*this, getSrcType(), getResultType());
2084	}
2085
2086	namespace {
2087	/// Reshape of a splat constant can be replaced with a constant of the result
2088	/// type.
2089	template <typename TensorReshapeOp>
2090	struct FoldReshapeWithConstant : OpRewritePattern<TensorReshapeOp> {
2091	using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
2092	LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
2093	PatternRewriter &rewriter) const override {
2094	DenseElementsAttr attr;
2095	if (!matchPattern(reshapeOp.getSrc(), m_Constant(bind_value: &attr)))
2096	return failure();
2097	if (!attr \|\| !attr.isSplat())
2098	return failure();
2099	DenseElementsAttr newAttr = DenseElementsAttr::getFromRawBuffer(
2100	reshapeOp.getResultType(), attr.getRawData());
2101	rewriter.replaceOpWithNewOp<arith::ConstantOp>(reshapeOp, newAttr);
2102	return success();
2103	}
2104	};
2105
2106	// Folds TensorReshapeOp(splat x : src_type) : res_type into splat x : res_type.
2107	template <typename TensorReshapeOp>
2108	class FoldReshapeWithSplat : public OpRewritePattern<TensorReshapeOp> {
2109	public:
2110	using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
2111
2112	LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
2113	PatternRewriter &rewriter) const override {
2114	auto splatOp = reshapeOp.getSrc().template getDefiningOp<tensor::SplatOp>();
2115	if (!splatOp \|\| !splatOp.getAggregate().getType().hasStaticShape())
2116	return failure();
2117
2118	rewriter.replaceOpWithNewOp<tensor::SplatOp>(
2119	reshapeOp, reshapeOp.getResultType(), splatOp.getInput());
2120	return success();
2121	}
2122	};
2123
2124	/// Reshape of a FromElements can be replaced with a FromElements of the
2125	/// result type
2126	template <typename TensorReshapeOp>
2127	struct FoldReshapeWithFromElements : OpRewritePattern<TensorReshapeOp> {
2128	using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
2129	LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
2130	PatternRewriter &rewriter) const override {
2131	auto fromElements =
2132	reshapeOp.getSrc().template getDefiningOp<FromElementsOp>();
2133	if (!fromElements)
2134	return failure();
2135
2136	auto shapedTy = llvm::cast<ShapedType>(reshapeOp.getType());
2137
2138	if (!shapedTy.hasStaticShape())
2139	return failure();
2140
2141	rewriter.replaceOpWithNewOp<FromElementsOp>(reshapeOp, reshapeOp.getType(),
2142	fromElements.getElements());
2143	return success();
2144	}
2145	};
2146
2147	// Fold CastOp into CollapseShapeOp when adding static information.
2148	struct FoldCollapseOfCastOp : public OpRewritePattern<CollapseShapeOp> {
2149	using OpRewritePattern<CollapseShapeOp>::OpRewritePattern;
2150
2151	LogicalResult matchAndRewrite(CollapseShapeOp collapseShapeOp,
2152	PatternRewriter &rewriter) const override {
2153	auto castOp = collapseShapeOp.getSrc().getDefiningOp<tensor::CastOp>();
2154	if (!tensor::canFoldIntoConsumerOp(castOp))
2155	return failure();
2156
2157	RankedTensorType srcType =
2158	llvm::cast<RankedTensorType>(castOp.getSource().getType());
2159	RankedTensorType newResultType = CollapseShapeOp::inferCollapsedType(
2160	srcType, collapseShapeOp.getReassociationMaps());
2161
2162	if (newResultType == collapseShapeOp.getResultType()) {
2163	rewriter.modifyOpInPlace(collapseShapeOp, [&]() {
2164	collapseShapeOp.getSrcMutable().assign(castOp.getSource());
2165	});
2166	} else {
2167	auto newOp = rewriter.create<CollapseShapeOp>(
2168	collapseShapeOp.getLoc(), newResultType, castOp.getSource(),
2169	collapseShapeOp.getReassociation());
2170	rewriter.replaceOpWithNewOp<tensor::CastOp>(
2171	collapseShapeOp, collapseShapeOp.getResultType(), newOp);
2172	}
2173	return success();
2174	}
2175	};
2176
2177	/// Fold/sink a producer `tensor.cast` with a consumer `tensor.expand_shape` by
2178	/// matching constant output_shape operands of the expand. This makes the
2179	/// `tensor.expand_shape` more static and creates a consumer cast that can be
2180	/// propagated further.
2181	struct ConvertToStaticExpandShape : public OpRewritePattern<ExpandShapeOp> {
2182	using OpRewritePattern<ExpandShapeOp>::OpRewritePattern;
2183
2184	LogicalResult matchAndRewrite(ExpandShapeOp expandOp,
2185	PatternRewriter &rewriter) const override {
2186	auto castOp = expandOp.getSrc().getDefiningOp<CastOp>();
2187	if (!canFoldIntoConsumerOp(castOp))
2188	return failure();
2189
2190	ArrayRef<int64_t> castSrcShape = castOp.getSource().getType().getShape();
2191	SmallVector<ReassociationIndices, `4`> reassoc =
2192	expandOp.getReassociationIndices();
2193
2194	SmallVector<int64_t> newOutputShape(expandOp.getResultType().getShape());
2195	SmallVector<Value> dynamicOutputShape;
2196	auto outputIt = expandOp.getOutputShape().begin();
2197
2198	for (const auto &[inputDim, innerReassoc] : llvm::enumerate(reassoc)) {
2199	for (uint64_t outDim : innerReassoc) {
2200	if (!ShapedType::isDynamic(newOutputShape[outDim]))
2201	continue;
2202
2203	// If the cast's src type is dynamic, don't infer any of the
2204	// corresponding expanded dimensions. `tensor.expand_shape` requires at
2205	// least one of the expanded dimensions to be dynamic if the input is
2206	// dynamic.
2207	Value val = *outputIt;
2208	++outputIt;
2209	if (ShapedType::isDynamic(castSrcShape[inputDim])) {
2210	dynamicOutputShape.push_back(val);
2211	continue;
2212	}
2213
2214	APInt cst;
2215	if (matchPattern(val, m_ConstantInt(&cst))) {
2216	newOutputShape[outDim] = cst.getSExtValue();
2217	} else {
2218	dynamicOutputShape.push_back(val);
2219	}
2220	}
2221	}
2222
2223	// Couldn't match any values, nothing to change
2224	if (expandOp.getOutputShape().size() == dynamicOutputShape.size())
2225	return failure();
2226
2227	// Calculate the input shape from the output
2228	SmallVector<int64_t> newInputShape(expandOp.getSrcType().getRank(), `1l`);
2229	for (auto inDim : llvm::seq<int>(`0`, newInputShape.size())) {
2230	for (auto outDim : reassoc[inDim]) {
2231	auto ofr = newOutputShape[outDim];
2232	if (ShapedType::isDynamic(ofr)) {
2233	newInputShape[inDim] = ShapedType::kDynamic;
2234	break;
2235	}
2236	newInputShape[inDim] *= ofr;
2237	}
2238	}
2239
2240	SmallVector<OpFoldResult> outputOfr =
2241	getMixedValues(staticValues: newOutputShape, dynamicValues: dynamicOutputShape, b&: rewriter);
2242	auto inputType = RankedTensorType::get(
2243	newInputShape, expandOp.getSrcType().getElementType());
2244	auto outputType = RankedTensorType::get(
2245	newOutputShape, expandOp.getSrcType().getElementType());
2246	auto inputCast = rewriter.create<CastOp>(expandOp.getLoc(), inputType,
2247	expandOp.getSrc());
2248	auto newExpand = rewriter.create<ExpandShapeOp>(
2249	expandOp.getLoc(), outputType, inputCast.getResult(),
2250	expandOp.getReassociationIndices(), outputOfr);
2251	rewriter.replaceOpWithNewOp<CastOp>(expandOp, expandOp.getType(),
2252	newExpand.getResult());
2253	return success();
2254	}
2255	};
2256	} // namespace
2257
2258	void ExpandShapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
2259	MLIRContext *context) {
2260	results.add<
2261	ComposeReassociativeReshapeOps<ExpandShapeOp, ReshapeOpKind::kExpand>,
2262	ComposeExpandOfCollapseOp<ExpandShapeOp, CollapseShapeOp>,
2263	ConvertToStaticExpandShape, FoldReshapeWithConstant<ExpandShapeOp>,
2264	FoldReshapeWithSplat<ExpandShapeOp>,
2265	FoldReshapeWithFromElements<ExpandShapeOp>>(context);
2266	}
2267
2268	void CollapseShapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
2269	MLIRContext *context) {
2270	results.add<
2271	ComposeReassociativeReshapeOps<CollapseShapeOp, ReshapeOpKind::kCollapse>,
2272	ComposeCollapseOfExpandOp<CollapseShapeOp, ExpandShapeOp, CastOp,
2273	tensor::DimOp, RankedTensorType>,
2274	FoldReshapeWithConstant<CollapseShapeOp>,
2275	FoldReshapeWithSplat<CollapseShapeOp>,
2276	FoldReshapeWithFromElements<CollapseShapeOp>, FoldCollapseOfCastOp>(
2277	context);
2278	}
2279
2280	OpFoldResult ExpandShapeOp::fold(FoldAdaptor adaptor) {
2281	return foldReshapeOp<ExpandShapeOp, CollapseShapeOp>(*this,
2282	adaptor.getOperands());
2283	}
2284
2285	OpFoldResult CollapseShapeOp::fold(FoldAdaptor adaptor) {
2286	return foldReshapeOp<CollapseShapeOp, ExpandShapeOp>(*this,
2287	adaptor.getOperands());
2288	}
2289
2290	//===----------------------------------------------------------------------===//
2291	// ExtractSliceOp
2292	//===----------------------------------------------------------------------===//
2293
2294	void ExtractSliceOp::getAsmResultNames(
2295	function_ref<void(Value, StringRef)> setNameFn) {
2296	setNameFn(getResult(), "extracted_slice");
2297	}
2298
2299	/// An extract_slice result type can be inferred, when it is not
2300	/// rank-reduced, from the source type and the static representation of
2301	/// offsets, sizes and strides. Special sentinels encode the dynamic case.
2302	RankedTensorType ExtractSliceOp::inferResultType(
2303	RankedTensorType sourceTensorType, ArrayRef<int64_t> staticOffsets,
2304	ArrayRef<int64_t> staticSizes, ArrayRef<int64_t> staticStrides) {
2305	// An extract_slice op may specify only a leading subset of offset/sizes/
2306	// strides in which case we complete with offset=0, sizes from memref type
2307	// and strides=1.
2308	assert(static_cast<int64_t>(staticSizes.size()) ==
2309	sourceTensorType.getRank() &&
2310	"unexpected staticSizes not equal to rank of source");
2311	return RankedTensorType::get(staticSizes, sourceTensorType.getElementType(),
2312	sourceTensorType.getEncoding());
2313	}
2314
2315	RankedTensorType ExtractSliceOp::inferResultType(
2316	RankedTensorType sourceTensorType, ArrayRef<OpFoldResult> offsets,
2317	ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides) {
2318	SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
2319	SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
2320	dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
2321	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes);
2322	dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
2323	return ExtractSliceOp::inferResultType(sourceTensorType, staticOffsets,
2324	staticSizes, staticStrides);
2325	}
2326
2327	/// If the rank is reduced (i.e. the desiredResultRank is smaller than the
2328	/// number of sizes), drop as many size 1 as needed to produce an inferred
2329	/// type with the desired rank.
2330	///
2331	/// Note that there may be multiple ways to compute this rank-reduced type:
2332	/// e.g. 1x6x1 can rank-reduce to either 1x6 or 6x1 2-D tensors.
2333	///
2334	/// To disambiguate, this function always drops the first 1 sizes occurrences.
2335	RankedTensorType ExtractSliceOp::inferCanonicalRankReducedResultType(
2336	unsigned desiredResultRank, RankedTensorType sourceRankedTensorType,
2337	ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
2338	ArrayRef<int64_t> strides) {
2339	// Type inferred in the absence of rank-reducing behavior.
2340	auto inferredType = llvm::cast<RankedTensorType>(
2341	inferResultType(sourceRankedTensorType, offsets, sizes, strides));
2342	int rankDiff = inferredType.getRank() - desiredResultRank;
2343	if (rankDiff > `0`) {
2344	auto shape = inferredType.getShape();
2345	llvm::SmallBitVector dimsToProject =
2346	getPositionsOfShapeOne(rankDiff, shape);
2347	SmallVector<int64_t> projectedShape;
2348	// Best effort rank-reducing: drop 1s in order.
2349	for (unsigned pos = `0`, e = shape.size(); pos < e; ++pos)
2350	if (!dimsToProject.test(pos))
2351	projectedShape.push_back(shape[pos]);
2352	inferredType =
2353	RankedTensorType::get(projectedShape, inferredType.getElementType());
2354	}
2355	return inferredType;
2356	}
2357
2358	RankedTensorType ExtractSliceOp::inferCanonicalRankReducedResultType(
2359	unsigned desiredResultRank, RankedTensorType sourceRankedTensorType,
2360	ArrayRef<OpFoldResult> offsets, ArrayRef<OpFoldResult> sizes,
2361	ArrayRef<OpFoldResult> strides) {
2362	SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
2363	SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
2364	dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
2365	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes);
2366	dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
2367	return ExtractSliceOp::inferCanonicalRankReducedResultType(
2368	desiredResultRank, sourceRankedTensorType, staticOffsets, staticSizes,
2369	staticStrides);
2370	}
2371
2372	/// Build an ExtractSliceOp with mixed static and dynamic entries and custom
2373	/// result type. If the type passed is nullptr, it is inferred.
2374	void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
2375	RankedTensorType resultType, Value source,
2376	ArrayRef<OpFoldResult> offsets,
2377	ArrayRef<OpFoldResult> sizes,
2378	ArrayRef<OpFoldResult> strides,
2379	ArrayRef<NamedAttribute> attrs) {
2380	SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
2381	SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
2382	dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
2383	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes);
2384	dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
2385	auto sourceRankedTensorType = llvm::cast<RankedTensorType>(source.getType());
2386	// Structuring implementation this way avoids duplication between builders.
2387	if (!resultType) {
2388	resultType = llvm::cast<RankedTensorType>(ExtractSliceOp::inferResultType(
2389	sourceRankedTensorType, staticOffsets, staticSizes, staticStrides));
2390	}
2391	result.addAttributes(attrs);
2392	build(b, result, resultType, source, dynamicOffsets, dynamicSizes,
2393	dynamicStrides, b.getDenseI64ArrayAttr(staticOffsets),
2394	b.getDenseI64ArrayAttr(staticSizes),
2395	b.getDenseI64ArrayAttr(staticStrides));
2396	}
2397
2398	/// Build an ExtractSliceOp with mixed static and dynamic entries and inferred
2399	/// result type.
2400	void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2401	ArrayRef<OpFoldResult> offsets,
2402	ArrayRef<OpFoldResult> sizes,
2403	ArrayRef<OpFoldResult> strides,
2404	ArrayRef<NamedAttribute> attrs) {
2405	build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
2406	}
2407
2408	/// Build an ExtractSliceOp with mixed static and dynamic entries packed into
2409	/// a Range vector.
2410	void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2411	ArrayRef<Range> ranges,
2412	ArrayRef<NamedAttribute> attrs) {
2413	auto [offsets, sizes, strides] = getOffsetsSizesAndStrides(ranges);
2414	build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
2415	}
2416
2417	/// Build an ExtractSliceOp with dynamic entries and custom result type. If
2418	/// the type passed is nullptr, it is inferred.
2419	void ExtractSliceOp::build(OpBuilder &b, OperationState &result,
2420	RankedTensorType resultType, Value source,
2421	ValueRange offsets, ValueRange sizes,
2422	ValueRange strides, ArrayRef<NamedAttribute> attrs) {
2423	SmallVector<OpFoldResult> offsetValues = llvm::to_vector<`4`>(
2424	llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
2425	SmallVector<OpFoldResult> sizeValues = llvm::to_vector<`4`>(
2426	llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
2427	SmallVector<OpFoldResult> strideValues = llvm::to_vector<`4`>(
2428	llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
2429	build(b, result, resultType, source, offsetValues, sizeValues, strideValues);
2430	}
2431
2432	/// Build an ExtractSliceOp with dynamic entries and inferred result type.
2433	void ExtractSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2434	ValueRange offsets, ValueRange sizes,
2435	ValueRange strides, ArrayRef<NamedAttribute> attrs) {
2436	build(b, result, RankedTensorType(), source, offsets, sizes, strides, attrs);
2437	}
2438
2439	static LogicalResult produceSliceErrorMsg(SliceVerificationResult result,
2440	Operation *op,
2441	RankedTensorType expectedType) {
2442	switch (result) {
2443	case SliceVerificationResult::Success:
2444	return success();
2445	case SliceVerificationResult::RankTooLarge:
2446	return op->emitError(message: "expected rank to be smaller or equal to ")
2447	<< "the other rank. ";
2448	case SliceVerificationResult::SizeMismatch:
2449	return op->emitError(message: "expected type to be ")
2450	<< expectedType << " or a rank-reduced version. (size mismatch) ";
2451	case SliceVerificationResult::ElemTypeMismatch:
2452	return op->emitError(message: "expected element type to be ")
2453	<< expectedType.getElementType();
2454	default:
2455	llvm_unreachable("unexpected extract_slice op verification result");
2456	}
2457	}
2458
2459	/// Verifier for ExtractSliceOp.
2460	LogicalResult ExtractSliceOp::verify() {
2461	RankedTensorType sourceType = getSourceType();
2462
2463	// Verify result type against inferred type.
2464	RankedTensorType expectedType = ExtractSliceOp::inferResultType(
2465	sourceType, getMixedOffsets(), getMixedSizes(), getMixedStrides());
2466	SliceVerificationResult result = isRankReducedType(expectedType, getType());
2467	if (result != SliceVerificationResult::Success)
2468	return produceSliceErrorMsg(result, *this, expectedType);
2469
2470	// Verify that offsets, sizes, strides do not run out-of-bounds with respect
2471	// to the source tensor.
2472	SliceBoundsVerificationResult boundsResult = verifyInBoundsSlice(
2473	sourceType.getShape(), getStaticOffsets(), getStaticSizes(),
2474	getStaticStrides(), /generateErrorMessage=/true);
2475	if (!boundsResult.isValid)
2476	return getOperation()->emitError(boundsResult.errorMessage);
2477
2478	return success();
2479	}
2480
2481	llvm::SmallBitVector ExtractSliceOp::getDroppedDims() {
2482	return ::getDroppedDims(getType().getShape(), getMixedSizes());
2483	}
2484
2485	FailureOr<Value>
2486	ExtractSliceOp::rankReduceIfNeeded(OpBuilder &b, Location loc, Value value,
2487	ArrayRef<int64_t> desiredShape) {
2488	auto sourceTensorType = llvm::dyn_cast<RankedTensorType>(value.getType());
2489	assert(sourceTensorType && "not a ranked tensor type");
2490	auto sourceShape = sourceTensorType.getShape();
2491	if (sourceShape.equals(desiredShape))
2492	return value;
2493	auto maybeRankReductionMask =
2494	mlir::computeRankReductionMask(sourceShape, desiredShape);
2495	if (!maybeRankReductionMask)
2496	return failure();
2497	return createCanonicalRankReducingExtractSliceOp(
2498	b, loc, value,
2499	RankedTensorType::Builder(sourceTensorType).setShape(desiredShape));
2500	}
2501
2502	LogicalResult ExtractSliceOp::reifyResultShapes(
2503	OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
2504	reifiedReturnShapes.resize(`1`);
2505	reifiedReturnShapes[`0`].reserve(getType().getRank());
2506	SmallVector<OpFoldResult> mixedSizes = getMixedSizes();
2507	llvm::SmallBitVector droppedDims = getDroppedDims();
2508	for (const auto &size : enumerate(mixedSizes)) {
2509	if (droppedDims.test(size.index()))
2510	continue;
2511	reifiedReturnShapes[`0`].push_back(size.value());
2512	}
2513	return success();
2514	}
2515
2516	namespace {
2517	/// Pattern to rewrite an extract_slice op with tensor::Cast arguments.
2518	/// This essentially pushes memref_cast past its consuming slice when
2519	/// `canFoldIntoConsumerOp` is true.
2520	///
2521	/// Example:
2522	/// ```
2523	/// %0 = tensor.cast %V : tensor<16x16xf32> to tensor<?x?xf32>
2524	/// %1 = tensor.extract_slice %0[0, 0][3, 4][1, 1] : tensor<?x?xf32> to
2525	/// tensor<3x4xf32>
2526	/// ```
2527	/// is rewritten into:
2528	/// ```
2529	/// %0 = tensor.extract_slice %V[0, 0][3, 4][1, 1] : tensor<16x16xf32> to
2530	/// tensor<3x4xf32> %1 = tensor.cast %0: tensor<3x4xf32> to tensor<3x4xf32>
2531	/// ```
2532	class ExtractSliceOpCastFolder final : public OpRewritePattern<ExtractSliceOp> {
2533	public:
2534	using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
2535
2536	LogicalResult matchAndRewrite(ExtractSliceOp sliceOp,
2537	PatternRewriter &rewriter) const override {
2538	// Any constant operand, just return to let the constant folder kick in.
2539	if (llvm::any_of(sliceOp.getOperands(), [](Value operand) {
2540	return matchPattern(value: operand, pattern: matchConstantIndex());
2541	}))
2542	return failure();
2543
2544	auto castOp = sliceOp.getSource().getDefiningOp<CastOp>();
2545	if (!castOp)
2546	return failure();
2547
2548	if (!canFoldIntoConsumerOp(castOp))
2549	return failure();
2550
2551	// Pattern does not apply if the produced op would not verify.
2552	SliceBoundsVerificationResult sliceResult = verifyInBoundsSlice(
2553	cast<RankedTensorType>(castOp.getSource().getType()).getShape(),
2554	sliceOp.getStaticOffsets(), sliceOp.getStaticSizes(),
2555	sliceOp.getStaticStrides());
2556	if (!sliceResult.isValid)
2557	return failure();
2558
2559	// Create folded extract.
2560	Location loc = sliceOp.getLoc();
2561	Value newResult = rewriter.create<ExtractSliceOp>(
2562	loc, sliceOp.getType(), castOp.getSource(), sliceOp.getOffsets(),
2563	sliceOp.getSizes(), sliceOp.getStrides(), sliceOp.getStaticOffsets(),
2564	sliceOp.getStaticSizes(), sliceOp.getStaticStrides());
2565	rewriter.replaceOp(sliceOp, newResult);
2566	return success();
2567	}
2568	};
2569
2570	/// Slice elements from `values` into `outValues`. `counts` represents the
2571	/// numbers of elements to stride in the original values for each dimension.
2572	/// The output values can be used to construct a DenseElementsAttr.
2573	template <typename IterTy, typename ElemTy>
2574	static void sliceElements(IterTy values, ArrayRef<int64_t> counts,
2575	ArrayRef<int64_t> offsets, ArrayRef<int64_t> sizes,
2576	ArrayRef<int64_t> strides,
2577	llvm::SmallVectorImpl<ElemTy> *outValues) {
2578	assert(offsets.size() == sizes.size());
2579	assert(offsets.size() == strides.size());
2580	if (offsets.empty())
2581	return;
2582
2583	int64_t offset = offsets.front();
2584	int64_t size = sizes.front();
2585	int64_t stride = strides.front();
2586	if (offsets.size() == `1`) {
2587	for (int64_t i = `0`; i < size; ++i, offset += stride)
2588	outValues->push_back(*(values + offset));
2589
2590	return;
2591	}
2592
2593	for (int64_t i = `0`; i < size; ++i, offset += stride) {
2594	auto begin = values + offset * counts.front();
2595	sliceElements<IterTy, ElemTy>(begin, counts.drop_front(),
2596	offsets.drop_front(), sizes.drop_front(),
2597	strides.drop_front(), outValues);
2598	}
2599	}
2600
2601	/// Fold arith.constant and tensor.extract_slice into arith.constant. The
2602	/// folded operation might introduce more constant data; Users can control
2603	/// their heuristics by the control function.
2604	class ConstantOpExtractSliceFolder final
2605	: public OpRewritePattern<ExtractSliceOp> {
2606	public:
2607	using OpRewritePattern<ExtractSliceOp>::OpRewritePattern;
2608
2609	ConstantOpExtractSliceFolder(MLIRContext *context,
2610	ControlConstantExtractSliceFusionFn controlFn)
2611	: OpRewritePattern<ExtractSliceOp>(context),
2612	controlFn(std::move(controlFn)) {}
2613
2614	LogicalResult matchAndRewrite(ExtractSliceOp op,
2615	PatternRewriter &rewriter) const override {
2616	DenseElementsAttr attr;
2617	if (!matchPattern(op.getSource(), m_Constant(bind_value: &attr)))
2618	return failure();
2619
2620	// A constant splat is handled by fold().
2621	if (attr.isSplat())
2622	return failure();
2623
2624	// Dynamic result shape is not supported.
2625	auto sourceType = llvm::cast<ShapedType>(op.getSource().getType());
2626	auto resultType = llvm::cast<ShapedType>(op.getResult().getType());
2627	if (!sourceType.hasStaticShape() \|\| !resultType.hasStaticShape())
2628	return failure();
2629
2630	// Customized control over the folding.
2631	if (!controlFn(op))
2632	return failure();
2633
2634	int64_t count = sourceType.getNumElements();
2635	if (count == `0`)
2636	return failure();
2637
2638	// Check if there are any dynamic parts, which are not supported.
2639	auto offsets = op.getStaticOffsets();
2640	if (llvm::is_contained(offsets, ShapedType::kDynamic))
2641	return failure();
2642	auto sizes = op.getStaticSizes();
2643	if (llvm::is_contained(sizes, ShapedType::kDynamic))
2644	return failure();
2645	auto strides = op.getStaticStrides();
2646	if (llvm::is_contained(strides, ShapedType::kDynamic))
2647	return failure();
2648
2649	// Compute the stride for each dimension.
2650	SmallVector<int64_t> counts;
2651	ArrayRef<int64_t> shape = sourceType.getShape();
2652	counts.reserve(N: shape.size());
2653	for (int64_t v : shape) {
2654	count = count / v;
2655	counts.push_back(count);
2656	}
2657
2658	// New attribute constructed by the sliced values.
2659	DenseElementsAttr newAttr;
2660
2661	if (auto elems = llvm::dyn_cast<DenseIntElementsAttr>(attr)) {
2662	SmallVector<APInt> outValues;
2663	outValues.reserve(N: sourceType.getNumElements());
2664	sliceElements<DenseElementsAttr::IntElementIterator, APInt>(
2665	elems.begin(), counts, offsets, sizes, strides, &outValues);
2666	newAttr = DenseElementsAttr::get(resultType, outValues);
2667	} else if (auto elems = llvm::dyn_cast<DenseFPElementsAttr>(attr)) {
2668	SmallVector<APFloat> outValues;
2669	outValues.reserve(N: sourceType.getNumElements());
2670	sliceElements<DenseElementsAttr::FloatElementIterator, APFloat>(
2671	elems.begin(), counts, offsets, sizes, strides, &outValues);
2672	newAttr = DenseElementsAttr::get(resultType, outValues);
2673	}
2674
2675	if (newAttr) {
2676	rewriter.replaceOpWithNewOp<arith::ConstantOp>(op, resultType, newAttr);
2677	return success();
2678	}
2679
2680	return failure();
2681	}
2682
2683	private:
2684	/// This additionally controls whether the fold happens or not. Users can
2685	/// impose their heuristics in the function.
2686	ControlConstantExtractSliceFusionFn controlFn;
2687	};
2688
2689	} // namespace
2690
2691	void mlir::tensor::populateFoldConstantExtractSlicePatterns(
2692	RewritePatternSet &patterns,
2693	const ControlConstantExtractSliceFusionFn &controlFn) {
2694	patterns.add<ConstantOpExtractSliceFolder>(patterns.getContext(), controlFn);
2695	}
2696
2697	/// Return the canonical type of the result of an extract_slice op.
2698	struct SliceReturnTypeCanonicalizer {
2699	RankedTensorType operator()(ExtractSliceOp op,
2700	ArrayRef<OpFoldResult> mixedOffsets,
2701	ArrayRef<OpFoldResult> mixedSizes,
2702	ArrayRef<OpFoldResult> mixedStrides) {
2703	return ExtractSliceOp::inferCanonicalRankReducedResultType(
2704	op.getType().getRank(), op.getSourceType(), mixedOffsets, mixedSizes,
2705	mixedStrides);
2706	}
2707	};
2708
2709	/// A canonicalizer wrapper to replace ExtractSliceOps.
2710	struct SliceCanonicalizer {
2711	void operator()(PatternRewriter &rewriter, ExtractSliceOp op,
2712	ExtractSliceOp newOp) {
2713	Value replacement = newOp.getResult();
2714	if (replacement.getType() != op.getType())
2715	replacement = rewriter.create<tensor::CastOp>(op.getLoc(), op.getType(),
2716	replacement);
2717	rewriter.replaceOp(op, replacement);
2718	}
2719	};
2720
2721	void ExtractSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
2722	MLIRContext *context) {
2723	results.add<
2724	OpWithOffsetSizesAndStridesConstantArgumentFolder<
2725	ExtractSliceOp, SliceReturnTypeCanonicalizer, SliceCanonicalizer>,
2726	ExtractSliceOpCastFolder>(context);
2727	}
2728
2729	//
2730	static LogicalResult
2731	foldIdentityOffsetSizeAndStrideOpInterface(OffsetSizeAndStrideOpInterface op,
2732	ShapedType shapedType) {
2733	OpBuilder b(op.getContext());
2734	for (OpFoldResult ofr : op.getMixedOffsets())
2735	if (getConstantIntValue(ofr) != static_cast<int64_t>(`0`))
2736	return failure();
2737	// Rank-reducing noops only need to inspect the leading dimensions:
2738	// llvm::zip is appropriate.
2739	auto shape = shapedType.getShape();
2740	for (auto it : llvm::zip(op.getMixedSizes(), shape))
2741	if (getConstantIntValue(std::get<`0`>(it)) != std::get<`1`>(it))
2742	return failure();
2743	for (OpFoldResult ofr : op.getMixedStrides())
2744	if (getConstantIntValue(ofr) != static_cast<int64_t>(`1`))
2745	return failure();
2746	return success();
2747	}
2748
2749	/// If we have an ExtractSliceOp consuming an InsertSliceOp with the same
2750	/// slice, we can return the InsertSliceOp's source directly.
2751	// TODO: This only checks the immediate producer; extend to go up the
2752	// insert/extract chain if the slices are disjoint.
2753	static Value foldExtractAfterInsertSlice(ExtractSliceOp extractOp) {
2754	auto insertOp = extractOp.getSource().getDefiningOp<InsertSliceOp>();
2755
2756	auto isSame = [](OpFoldResult a, OpFoldResult b) { return a == b; };
2757	if (insertOp && insertOp.getSource().getType() == extractOp.getType() &&
2758	insertOp.isSameAs(extractOp, isSame))
2759	return insertOp.getSource();
2760
2761	return {};
2762	}
2763
2764	OpFoldResult ExtractSliceOp::fold(FoldAdaptor adaptor) {
2765	if (OpFoldResult reshapedSource = reshapeConstantSource(
2766	llvm::dyn_cast_if_present<SplatElementsAttr>(adaptor.getSource()),
2767	getResult().getType()))
2768	return reshapedSource;
2769	if (getSourceType() == getType() &&
2770	succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
2771	return this->getSource();
2772	if (Value slice = foldExtractAfterInsertSlice(*this))
2773	return slice;
2774
2775	return OpFoldResult();
2776	}
2777
2778	Value mlir::tensor::createCanonicalRankReducingExtractSliceOp(
2779	OpBuilder &b, Location loc, Value tensor, RankedTensorType targetType) {
2780	auto rankedTensorType = llvm::cast<RankedTensorType>(tensor.getType());
2781	unsigned rank = rankedTensorType.getRank();
2782	SmallVector<OpFoldResult> offsets(rank, b.getIndexAttr(`0`));
2783	SmallVector<OpFoldResult> sizes = getMixedSizes(builder&: b, loc, value: tensor);
2784	SmallVector<OpFoldResult> strides(rank, b.getIndexAttr(`1`));
2785	return b.createOrFold<tensor::ExtractSliceOp>(loc, targetType, tensor,
2786	offsets, sizes, strides);
2787	}
2788
2789	//===----------------------------------------------------------------------===//
2790	// InsertSliceOp
2791	//===----------------------------------------------------------------------===//
2792
2793	void InsertSliceOp::getAsmResultNames(
2794	function_ref<void(Value, StringRef)> setNameFn) {
2795	setNameFn(getResult(), "inserted_slice");
2796	}
2797
2798	// Build a InsertSliceOp with mixed static and dynamic entries.
2799	void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2800	Value dest, ArrayRef<OpFoldResult> offsets,
2801	ArrayRef<OpFoldResult> sizes,
2802	ArrayRef<OpFoldResult> strides,
2803	ArrayRef<NamedAttribute> attrs) {
2804	SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
2805	SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
2806	dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
2807	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes);
2808	dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
2809	result.addAttributes(attrs);
2810	build(b, result, dest.getType(), source, dest, dynamicOffsets, dynamicSizes,
2811	dynamicStrides, b.getDenseI64ArrayAttr(staticOffsets),
2812	b.getDenseI64ArrayAttr(staticSizes),
2813	b.getDenseI64ArrayAttr(staticStrides));
2814	}
2815
2816	/// Build an InsertSliceOp with mixed static and dynamic entries packed into a
2817	/// Range vector.
2818	void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2819	Value dest, ArrayRef<Range> ranges,
2820	ArrayRef<NamedAttribute> attrs) {
2821	auto [offsets, sizes, strides] = getOffsetsSizesAndStrides(ranges);
2822	build(b, result, source, dest, offsets, sizes, strides, attrs);
2823	}
2824
2825	// Build a InsertSliceOp with dynamic entries.
2826	void InsertSliceOp::build(OpBuilder &b, OperationState &result, Value source,
2827	Value dest, ValueRange offsets, ValueRange sizes,
2828	ValueRange strides, ArrayRef<NamedAttribute> attrs) {
2829	SmallVector<OpFoldResult> offsetValues = llvm::to_vector<`4`>(
2830	llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
2831	SmallVector<OpFoldResult> sizeValues = llvm::to_vector<`4`>(
2832	llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
2833	SmallVector<OpFoldResult> strideValues = llvm::to_vector<`4`>(
2834	llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
2835	build(b, result, source, dest, offsetValues, sizeValues, strideValues);
2836	}
2837
2838	/// Rank-reducing type verification for both InsertSliceOp and
2839	/// ParallelInsertSliceOp.
2840	static SliceVerificationResult verifyInsertSliceOp(
2841	RankedTensorType srcType, RankedTensorType dstType,
2842	ArrayRef<int64_t> staticOffsets, ArrayRef<int64_t> staticSizes,
2843	ArrayRef<int64_t> staticStrides, RankedTensorType expectedType = nullptr*) {
2844	// insert_slice is the inverse of extract_slice, use the same type
2845	// inference.
2846	RankedTensorType expected = ExtractSliceOp::inferResultType(
2847	dstType, staticOffsets, staticSizes, staticStrides);
2848	if (expectedType)
2849	*expectedType = expected;
2850	return isRankReducedType(expected, srcType);
2851	}
2852
2853	/// Verifier for InsertSliceOp.
2854	LogicalResult InsertSliceOp::verify() {
2855	// Verify result type against inferred type.
2856	RankedTensorType expectedType;
2857	SliceVerificationResult result =
2858	verifyInsertSliceOp(getSourceType(), getType(), getStaticOffsets(),
2859	getStaticSizes(), getStaticStrides(), &expectedType);
2860	if (result != SliceVerificationResult::Success)
2861	return produceSliceErrorMsg(result, *this, expectedType);
2862
2863	// Verify that offsets, sizes, strides do not run out-of-bounds with respect
2864	// to the destination tensor.
2865	SliceBoundsVerificationResult boundsResult = verifyInBoundsSlice(
2866	getDestType().getShape(), getStaticOffsets(), getStaticSizes(),
2867	getStaticStrides(), /generateErrorMessage=/true);
2868	if (!boundsResult.isValid)
2869	return getOperation()->emitError(boundsResult.errorMessage);
2870
2871	return success();
2872	}
2873
2874	/// If we have two consecutive InsertSliceOp writing to the same slice, we
2875	/// can mutate the second InsertSliceOp's destination to the first one's.
2876	///
2877	/// Example:
2878	///
2879	/// ```mlir
2880	/// %0 = tensor.insert_slice %slice0 into %input[0, 0] [64, 64] [1, 1]
2881	/// %1 = tensor.insert_slice %slice1 into %0[0, 0] [64, 64] [1, 1]
2882	/// ```
2883	///
2884	/// folds into:
2885	///
2886	/// ```mlir
2887	/// %1 = tensor.insert_slice %slice1 into %input[0, 0] [64, 64] [1, 1]
2888	/// ```
2889	///
2890	/// This pattern works with both InsertSliceOp and ParallelInsertSliceOp.
2891	static LogicalResult foldInsertAfterInsertSlice(InsertSliceOp insertOp) {
2892	auto prevInsertOp = insertOp.getDest().getDefiningOp<InsertSliceOp>();
2893
2894	auto isSame = [](OpFoldResult a, OpFoldResult b) { return a == b; };
2895	if (!prevInsertOp \|\|
2896	prevInsertOp.getSource().getType() != insertOp.getSource().getType() \|\|
2897	!prevInsertOp.isSameAs(insertOp, isSame))
2898	return failure();
2899
2900	insertOp.getDestMutable().assign(prevInsertOp.getDest());
2901	return success();
2902	}
2903
2904	/// Folds round-trip extract/insert slice op pairs.
2905	/// Example:
2906	/// ```mlir
2907	/// %0 = tensor.extract_slice %val[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1]
2908	/// %1 = tensor.insert_slice %0 into %val[0, 0, 0, 0] [1, 1, 2, 4] [1, 1, 1, 1]
2909	/// ```
2910	/// can be folded into %val.
2911	static Value foldInsertAfterExtractSlice(InsertSliceOp insertOp) {
2912	auto extractOp = insertOp.getSource().getDefiningOp<ExtractSliceOp>();
2913
2914	auto isSame = [](OpFoldResult a, OpFoldResult b) { return a == b; };
2915	if (!extractOp \|\| extractOp.getSource() != insertOp.getDest() \|\|
2916	!extractOp.isSameAs(insertOp, isSame))
2917	return nullptr;
2918
2919	return extractOp.getSource();
2920	}
2921
2922	OpFoldResult InsertSliceOp::fold(FoldAdaptor) {
2923	if (getSourceType().hasStaticShape() && getType().hasStaticShape() &&
2924	getSourceType() == getType() &&
2925	succeeded(foldIdentityOffsetSizeAndStrideOpInterface(*this, getType())))
2926	return this->getSource();
2927	if (succeeded(foldInsertAfterInsertSlice(*this)))
2928	return getResult();
2929	if (auto result = foldInsertAfterExtractSlice(*this))
2930	return result;
2931	if (llvm::any_of(getMixedSizes(), isZeroInteger))
2932	return getDest();
2933	return OpFoldResult();
2934	}
2935
2936	LogicalResult InsertSliceOp::reifyResultShapes(
2937	OpBuilder &builder, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
2938	reifiedReturnShapes.resize(`1`, SmallVector<OpFoldResult>(getType().getRank()));
2939	reifiedReturnShapes[`0`] = tensor::getMixedSizes(builder, getLoc(), getDest());
2940	return success();
2941	}
2942
2943	namespace {
2944	/// Pattern to rewrite a insert_slice op with constant arguments.
2945	///
2946	/// This pattern works with both InsertSliceOp and ParallelInsertSliceOp.
2947	template <typename InsertOpTy>
2948	class InsertSliceOpConstantArgumentFolder final
2949	: public OpRewritePattern<InsertOpTy> {
2950	public:
2951	using OpRewritePattern<InsertOpTy>::OpRewritePattern;
2952
2953	LogicalResult matchAndRewrite(InsertOpTy insertSliceOp,
2954	PatternRewriter &rewriter) const override {
2955	SmallVector<OpFoldResult> mixedOffsets(insertSliceOp.getMixedOffsets());
2956	SmallVector<OpFoldResult> mixedSizes(insertSliceOp.getMixedSizes());
2957	SmallVector<OpFoldResult> mixedStrides(insertSliceOp.getMixedStrides());
2958
2959	// No constant operands were folded, just return;
2960	if (failed(Result: foldDynamicOffsetSizeList(offsetsOrSizes&: mixedOffsets)) &&
2961	failed(Result: foldDynamicOffsetSizeList(offsetsOrSizes&: mixedSizes)) &&
2962	failed(Result: foldDynamicStrideList(strides&: mixedStrides)))
2963	return failure();
2964
2965	// Pattern does not apply if the produced op would not verify.
2966	SliceBoundsVerificationResult sliceResult =
2967	verifyInBoundsSlice(insertSliceOp.getDest().getType().getShape(),
2968	mixedOffsets, mixedSizes, mixedStrides);
2969	if (!sliceResult.isValid)
2970	return failure();
2971
2972	// Create the new op in canonical form.
2973	auto sourceType = ExtractSliceOp::inferCanonicalRankReducedResultType(
2974	insertSliceOp.getSourceType().getRank(), insertSliceOp.getDestType(),
2975	mixedOffsets, mixedSizes, mixedStrides);
2976	Value toInsert = insertSliceOp.getSource();
2977	if (sourceType != insertSliceOp.getSourceType()) {
2978	OpBuilder::InsertionGuard g(rewriter);
2979	// The only difference between InsertSliceOp and ParallelInsertSliceOp
2980	// is that the insertion point is just before the ParallelCombiningOp in
2981	// the parallel case.
2982	if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
2983	rewriter.setInsertionPoint(insertSliceOp->getParentOp());
2984	toInsert = rewriter.create<tensor::CastOp>(insertSliceOp.getLoc(),
2985	sourceType, toInsert);
2986	}
2987	rewriter.replaceOpWithNewOp<InsertOpTy>(
2988	insertSliceOp, toInsert, insertSliceOp.getDest(), mixedOffsets,
2989	mixedSizes, mixedStrides);
2990	return success();
2991	}
2992	};
2993
2994	/// Fold tensor_casts with insert_slice operations. If the source or
2995	/// destination tensor is a tensor_cast that removes static type information,
2996	/// the cast is folded into the insert_slice operation. E.g.:
2997	///
2998	/// ```mlir
2999	/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
3000	/// %2 = tensor.insert_slice %1 into ... : tensor<?x?xf32> into ...
3001	/// ```
3002	///
3003	/// folds into:
3004	///
3005	/// ```mlir
3006	/// %2 = tensor.insert_slice %0 into ... : tensor<8x16xf32> into ...
3007	/// ```
3008	///
3009	/// Note: When folding a cast on the destination tensor, the result of the
3010	/// insert_slice operation is casted to ensure that the type of the result did
3011	/// not change.
3012	///
3013	/// This pattern works with both InsertSliceOp and ParallelInsertSliceOp.
3014	template <typename InsertOpTy>
3015	struct InsertSliceOpCastFolder final : public OpRewritePattern<InsertOpTy> {
3016	using OpRewritePattern<InsertOpTy>::OpRewritePattern;
3017
3018	LogicalResult matchAndRewrite(InsertOpTy insertSliceOp,
3019	PatternRewriter &rewriter) const override {
3020	if (llvm::any_of(insertSliceOp.getOperands(), [](Value operand) {
3021	return matchPattern(value: operand, pattern: matchConstantIndex());
3022	}))
3023	return failure();
3024
3025	auto getSourceOfCastOp = [](Value v) -> std::optional<Value> {
3026	auto castOp = v.getDefiningOp<tensor::CastOp>();
3027	if (!castOp \|\| !canFoldIntoConsumerOp(castOp))
3028	return std::nullopt;
3029	return castOp.getSource();
3030	};
3031	std::optional<Value> sourceCastSource =
3032	getSourceOfCastOp(insertSliceOp.getSource());
3033	std::optional<Value> destCastSource =
3034	getSourceOfCastOp(insertSliceOp.getDest());
3035	if (!sourceCastSource && !destCastSource)
3036	return failure();
3037
3038	auto src =
3039	(sourceCastSource ? *sourceCastSource : insertSliceOp.getSource());
3040	auto dst = (destCastSource ? *destCastSource : insertSliceOp.getDest());
3041	auto srcType = llvm::dyn_cast<RankedTensorType>(src.getType());
3042	auto dstType = llvm::dyn_cast<RankedTensorType>(dst.getType());
3043	if (!srcType \|\| !dstType)
3044	return failure();
3045
3046	// The tensor.cast source could have additional static information not seen
3047	// in the insert slice op static sizes, so we ignore dynamic dims when
3048	// computing the rank reduction mask.
3049	SmallVector<int64_t> staticSizes(insertSliceOp.getStaticSizes());
3050	auto rankReductionMask = computeRankReductionMask(
3051	staticSizes, srcType.getShape(), /matchDynamic=/true);
3052	if (!rankReductionMask.has_value())
3053	return failure();
3054	// Replace dimensions in the insert slice op with corresponding static dims
3055	// from the cast source type. If the insert slice sizes have static dims
3056	// that are not static in the tensor.cast source (i.e., when the cast op
3057	// casts a dynamic dim to static), the dim should not be replaced, and the
3058	// pattern will fail later in `verifyInsertSliceOp`.
3059	SmallVector<OpFoldResult> mixedSizes(insertSliceOp.getMixedSizes());
3060	int64_t rankReducedIdx = `0`;
3061	for (auto [idx, size] : enumerate(First&: staticSizes)) {
3062	if (!rankReductionMask.value().contains(idx) &&
3063	!srcType.isDynamicDim(rankReducedIdx)) {
3064	mixedSizes [idx] = getAsIndexOpFoldResult(
3065	rewriter.getContext(), srcType.getDimSize(rankReducedIdx));
3066	size = srcType.getDimSize(rankReducedIdx++);
3067	}
3068	}
3069
3070	// Pattern does not apply if the produced op would not verify.
3071	if (verifyInsertSliceOp(srcType, dstType, insertSliceOp.getStaticOffsets(),
3072	staticSizes, insertSliceOp.getStaticStrides()) !=
3073	SliceVerificationResult::Success)
3074	return failure();
3075	SliceBoundsVerificationResult sliceResult =
3076	verifyInBoundsSlice(dstType.getShape(), insertSliceOp.getMixedOffsets(),
3077	mixedSizes, insertSliceOp.getMixedStrides());
3078	if (!sliceResult.isValid)
3079	return failure();
3080
3081	Operation *replacement = rewriter.create<InsertOpTy>(
3082	insertSliceOp.getLoc(), src, dst, insertSliceOp.getMixedOffsets(),
3083	mixedSizes, insertSliceOp.getMixedStrides());
3084
3085	// In the parallel case there is no result and so nothing to cast.
3086	bool isParallelInsert =
3087	std::is_same<InsertOpTy, ParallelInsertSliceOp>::value;
3088	if (!isParallelInsert && dst.getType() != insertSliceOp.getDestType()) {
3089	replacement = rewriter.create<tensor::CastOp>(insertSliceOp.getLoc(),
3090	insertSliceOp.getDestType(),
3091	replacement->getResult(`0`));
3092	}
3093	rewriter.replaceOp(insertSliceOp, replacement->getResults());
3094	return success();
3095	}
3096	};
3097
3098	/// If additional static type information can be deduced from a insert_slice's
3099	/// size operands, insert an explicit cast of the op's source operand. This
3100	/// enables other canonicalization patterns that are matching for tensor_cast
3101	/// ops such as `ForOpTensorCastFolder` in SCF.
3102	///
3103	/// Example:
3104	///
3105	/// ```mlir
3106	/// %r = tensor.insert_slice %0 into %1[...] [64, 64] [1, 1]
3107	/// : tensor<?x?xf32> into ...
3108	/// ```
3109	///
3110	/// folds into:
3111	///
3112	/// ```mlir
3113	/// %tmp = tensor.cast %0 : tensor<?x?xf32> to tensor<64x64xf32>
3114	/// %r = tensor.insert_slice %tmp into %1[...] [64, 64] [1, 1]
3115	/// : tensor<64x64xf32> into ...
3116	/// ```
3117	///
3118	/// This patterns works with both InsertSliceOp and ParallelInsertSliceOp.
3119	template <typename InsertOpTy>
3120	struct InsertSliceOpSourceCastInserter final
3121	: public OpRewritePattern<InsertOpTy> {
3122	using OpRewritePattern<InsertOpTy>::OpRewritePattern;
3123
3124	LogicalResult matchAndRewrite(InsertOpTy insertSliceOp,
3125	PatternRewriter &rewriter) const override {
3126	RankedTensorType srcType = insertSliceOp.getSourceType();
3127	if (srcType.getRank() != insertSliceOp.getDestType().getRank())
3128	return failure();
3129	SmallVector<int64_t> newSrcShape(srcType.getShape());
3130	for (int64_t i = `0`; i < srcType.getRank(); ++i) {
3131	if (std::optional<int64_t> constInt =
3132	getConstantIntValue(insertSliceOp.getMixedSizes()[i])) {
3133	// Bail on invalid IR.
3134	if (*constInt < `0`)
3135	return failure();
3136	newSrcShape [i] = *constInt;
3137	}
3138	}
3139	if (!hasValidSizesOffsets(sizesOrOffsets: newSrcShape))
3140	return failure();
3141
3142	RankedTensorType newSrcType = RankedTensorType::get(
3143	newSrcShape, srcType.getElementType(), srcType.getEncoding());
3144	if (srcType == newSrcType \|\|
3145	!preservesStaticInformation(srcType, newSrcType) \|\|
3146	!tensor::CastOp::areCastCompatible(srcType, newSrcType))
3147	return failure();
3148
3149	// newSrcType is:
3150	// 1) Different from srcType.
3151	// 2) "More static" than srcType.
3152	// 3) Cast-compatible with srcType.
3153	// Insert the cast.
3154	OpBuilder::InsertionGuard g(rewriter);
3155	// The only difference between InsertSliceOp and ParallelInsertSliceOp is
3156	// that the insertion point is just before the ParallelCombiningOp in the
3157	// parallel case.
3158	if (std::is_same<InsertOpTy, ParallelInsertSliceOp>::value)
3159	rewriter.setInsertionPoint(insertSliceOp->getParentOp());
3160	Value cast = rewriter.create<tensor::CastOp>(
3161	insertSliceOp.getLoc(), newSrcType, insertSliceOp.getSource());
3162	rewriter.replaceOpWithNewOp<InsertOpTy>(
3163	insertSliceOp, cast, insertSliceOp.getDest(),
3164	insertSliceOp.getMixedOffsets(), insertSliceOp.getMixedSizes(),
3165	insertSliceOp.getMixedStrides());
3166	return success();
3167	}
3168	};
3169	} // namespace
3170
3171	llvm::SmallBitVector InsertSliceOp::getDroppedDims() {
3172	return ::getDroppedDims(getSourceType().getShape(), getMixedSizes());
3173	}
3174
3175	void InsertSliceOp::getCanonicalizationPatterns(RewritePatternSet &results,
3176	MLIRContext *context) {
3177	results.add<InsertSliceOpConstantArgumentFolder<InsertSliceOp>,
3178	InsertSliceOpCastFolder<InsertSliceOp>,
3179	InsertSliceOpSourceCastInserter<InsertSliceOp>>(context);
3180	}
3181
3182	Value mlir::tensor::createCanonicalRankReducingInsertSliceOp(OpBuilder &b,
3183	Location loc,
3184	Value tensor,
3185	Value dest) {
3186	auto rankedTensorType = llvm::cast<RankedTensorType>(dest.getType());
3187	unsigned rank = rankedTensorType.getRank();
3188	SmallVector<OpFoldResult> offsets(rank, b.getIndexAttr(`0`));
3189	SmallVector<OpFoldResult> sizes = getMixedSizes(builder&: b, loc, value: dest);
3190	SmallVector<OpFoldResult> strides(rank, b.getIndexAttr(`1`));
3191	return b.createOrFold<tensor::InsertSliceOp>(loc, tensor, dest, offsets,
3192	sizes, strides);
3193	}
3194
3195	//===----------------------------------------------------------------------===//
3196	// PadOp
3197	//===----------------------------------------------------------------------===//
3198
3199	void PadOp::getAsmResultNames(function_ref<void(Value, StringRef)> setNameFn) {
3200	setNameFn(getResult(), "padded");
3201	}
3202
3203	// TODO: Replace custom<InferType> directive with AllTypesMatch as soon as it
3204	// supports optional types.
3205	void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand,
3206	Type typeToInfer, Type typeToInferFrom) {}
3207
3208	ParseResult
3209	parseInferType(OpAsmParser &parser,
3210	std::optional<OpAsmParser::UnresolvedOperand> optOperand,
3211	Type &typeToInfer, Type typeToInferFrom) {
3212	if (optOperand)
3213	typeToInfer = typeToInferFrom;
3214	return success();
3215	}
3216
3217	LogicalResult PadOp::verify() {
3218	auto sourceType = llvm::cast<RankedTensorType>(getSource().getType());
3219	auto resultType = llvm::cast<RankedTensorType>(getResult().getType());
3220	auto expectedType =
3221	PadOp::inferResultType(sourceType, getStaticLow(), getStaticHigh());
3222	if (!expectedType) {
3223	return emitError("failed to infer expectedType from sourceType ")
3224	<< sourceType << ", specified resultType is " << resultType;
3225	}
3226	if (resultType.getRank() != expectedType.getRank()) {
3227	return emitError("specified type ")
3228	<< resultType << " does not match the inferred type "
3229	<< expectedType;
3230	}
3231	for (int i = `0`, e = sourceType.getRank(); i < e; ++i) {
3232	if (resultType.getDimSize(i) == expectedType.getDimSize(i))
3233	continue;
3234	if (expectedType.isDynamicDim(i))
3235	continue;
3236	return emitError("specified type ")
3237	<< resultType << " does not match the inferred type "
3238	<< expectedType;
3239	}
3240
3241	return success();
3242	}
3243
3244	LogicalResult PadOp::verifyRegions() {
3245	auto &region = getRegion();
3246	unsigned rank = llvm::cast<RankedTensorType>(getResult().getType()).getRank();
3247	Block &block = region.front();
3248	if (block.getNumArguments() != rank)
3249	return emitError("expected the block to have ") << rank << " arguments";
3250
3251	// Note: the number and type of yield values are checked in the YieldOp.
3252	for (const auto &en : llvm::enumerate(block.getArgumentTypes())) {
3253	if (!en.value().isIndex())
3254	return emitOpError("expected block argument ")
3255	<< (en.index() + `1`) << " to be an index";
3256	}
3257
3258	// Ensure that the region yields an element of the right type.
3259	auto yieldOp = llvm::cast<YieldOp>(block.getTerminator());
3260	if (yieldOp.getValue().getType() !=
3261	llvm::cast<ShapedType>(getType()).getElementType())
3262	return emitOpError("expected yield type to match shape element type");
3263
3264	return success();
3265	}
3266
3267	RankedTensorType PadOp::inferResultType(RankedTensorType sourceType,
3268	ArrayRef<int64_t> staticLow,
3269	ArrayRef<int64_t> staticHigh,
3270	ArrayRef<int64_t> resultShape) {
3271	unsigned rank = sourceType.getRank();
3272	if (staticLow.size() != rank)
3273	return RankedTensorType();
3274	if (staticHigh.size() != rank)
3275	return RankedTensorType();
3276	if (!resultShape.empty() && resultShape.size() != rank)
3277	return RankedTensorType();
3278
3279	SmallVector<int64_t, `4`> inferredShape;
3280	for (auto i : llvm::seq<unsigned>(`0`, rank)) {
3281	if (sourceType.isDynamicDim(i) \|\| staticLow[i] == ShapedType::kDynamic \|\|
3282	staticHigh[i] == ShapedType::kDynamic) {
3283	inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamic
3284	: resultShape[i]);
3285	} else {
3286	int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i];
3287	assert((resultShape.empty() \|\| size == resultShape[i] \|\|
3288	resultShape[i] == ShapedType::kDynamic) &&
3289	"mismatch between inferred shape and result shape");
3290	inferredShape.push_back(size);
3291	}
3292	}
3293
3294	return RankedTensorType::get(inferredShape, sourceType.getElementType());
3295	}
3296
3297	void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
3298	Value source, ArrayRef<int64_t> staticLow,
3299	ArrayRef<int64_t> staticHigh, ValueRange low, ValueRange high,
3300	bool nofold, ArrayRef<NamedAttribute> attrs) {
3301	auto sourceType = llvm::cast<RankedTensorType>(source.getType());
3302	if (!resultType)
3303	resultType = inferResultType(sourceType, staticLow, staticHigh);
3304	result.addAttributes(attrs);
3305	build(b, result, resultType, source, low, high,
3306	b.getDenseI64ArrayAttr(staticLow), b.getDenseI64ArrayAttr(staticHigh),
3307	nofold ? b.getUnitAttr() : UnitAttr());
3308	}
3309
3310	void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
3311	Value source, ValueRange low, ValueRange high, bool nofold,
3312	ArrayRef<NamedAttribute> attrs) {
3313	auto sourceType = llvm::cast<RankedTensorType>(source.getType());
3314	unsigned rank = sourceType.getRank();
3315	SmallVector<int64_t, `4`> staticVector(rank, ShapedType::kDynamic);
3316	build(b, result, resultType, source, staticVector, staticVector, low, high,
3317	nofold, attrs);
3318	}
3319
3320	void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
3321	Value source, ArrayRef<OpFoldResult> low,
3322	ArrayRef<OpFoldResult> high, bool nofold,
3323	ArrayRef<NamedAttribute> attrs) {
3324	auto sourceType = llvm::cast<RankedTensorType>(source.getType());
3325	SmallVector<Value, `4`> dynamicLow, dynamicHigh;
3326	SmallVector<int64_t, `4`> staticLow, staticHigh;
3327	// staticLow and staticHigh have full information of the padding config.
3328	// This will grow staticLow and staticHigh with 1 value. If the config is
3329	// dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1
3330	// value as well.
3331	dispatchIndexOpFoldResults(low, dynamicLow, staticLow);
3332	dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh);
3333	if (!resultType) {
3334	resultType = PadOp::inferResultType(sourceType, staticLow, staticHigh);
3335	}
3336	assert(llvm::isa<RankedTensorType>(resultType));
3337	result.addAttributes(attrs);
3338	build(b, result, resultType, source, dynamicLow, dynamicHigh,
3339	b.getDenseI64ArrayAttr(staticLow), b.getDenseI64ArrayAttr(staticHigh),
3340	nofold ? b.getUnitAttr() : UnitAttr());
3341	}
3342
3343	void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
3344	Value source, ArrayRef<OpFoldResult> low,
3345	ArrayRef<OpFoldResult> high, Value constantPadValue,
3346	bool nofold, ArrayRef<NamedAttribute> attrs) {
3347	build(b, result, resultType, source, low, high, nofold, attrs);
3348
3349	// Add a region and a block to yield the pad value.
3350	Region *region = result.regions[`0`].get();
3351	int sourceRank = llvm::cast<RankedTensorType>(source.getType()).getRank();
3352	SmallVector<Type> blockArgTypes(sourceRank, b.getIndexType());
3353	SmallVector<Location> blockArgLocs(sourceRank, result.location);
3354
3355	// `builder.createBlock` changes the insertion point within the block. Create
3356	// a guard to reset the insertion point of the builder after it is destroyed.
3357	OpBuilder::InsertionGuard guard(b);
3358	b.createBlock(region, region->end(), blockArgTypes, blockArgLocs);
3359	b.create<tensor::YieldOp>(result.location, constantPadValue);
3360	}
3361
3362	llvm::SmallBitVector PadOp::getPaddedDims() {
3363	llvm::SmallBitVector paddedDims(getSourceType().getRank());
3364	auto extractPaddedDims = [&](ArrayRef<OpFoldResult> paddingWidths) {
3365	for (const auto &en : enumerate(paddingWidths))
3366	if (getConstantIntValue(en.value()) != static_cast<int64_t>(`0`))
3367	paddedDims.set(en.index());
3368	};
3369	extractPaddedDims(getMixedLowPad());
3370	extractPaddedDims(getMixedHighPad());
3371	return paddedDims;
3372	}
3373
3374	namespace {
3375	// Folds tensor.pad when padding is static zeros and the attribute
3376	// doesn't request otherwise.
3377	struct FoldStaticZeroPadding : public OpRewritePattern<PadOp> {
3378	using OpRewritePattern<PadOp>::OpRewritePattern;
3379
3380	LogicalResult matchAndRewrite(PadOp padTensorOp,
3381	PatternRewriter &rewriter) const override {
3382	if (!padTensorOp.hasZeroLowPad() \|\| !padTensorOp.hasZeroHighPad())
3383	return failure();
3384	if (padTensorOp.getNofold())
3385	return failure();
3386	rewriter.replaceOpWithNewOp<tensor::CastOp>(
3387	padTensorOp, padTensorOp.getResult().getType(),
3388	padTensorOp.getSource());
3389	return success();
3390	}
3391	};
3392
3393	// Fold CastOp into PadOp when adding static information.
3394	struct FoldSourceTensorCast : public OpRewritePattern<PadOp> {
3395	using OpRewritePattern<PadOp>::OpRewritePattern;
3396
3397	LogicalResult matchAndRewrite(PadOp padTensorOp,
3398	PatternRewriter &rewriter) const override {
3399	auto castOp = padTensorOp.getSource().getDefiningOp<tensor::CastOp>();
3400	if (!tensor::canFoldIntoConsumerOp(castOp))
3401	return failure();
3402
3403	auto newResultType = PadOp::inferResultType(
3404	llvm::cast<RankedTensorType>(castOp.getSource().getType()),
3405	padTensorOp.getStaticLow(), padTensorOp.getStaticHigh(),
3406	padTensorOp.getResultType().getShape());
3407
3408	if (newResultType == padTensorOp.getResultType()) {
3409	rewriter.modifyOpInPlace(padTensorOp, [&]() {
3410	padTensorOp.getSourceMutable().assign(castOp.getSource());
3411	});
3412	} else {
3413	auto newOp = rewriter.create<PadOp>(
3414	padTensorOp->getLoc(), newResultType, padTensorOp.getSource(),
3415	padTensorOp.getStaticLow(), padTensorOp.getStaticHigh(),
3416	padTensorOp.getLow(), padTensorOp.getHigh(), padTensorOp.getNofold(),
3417	getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames()));
3418	IRMapping mapper;
3419	padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
3420
3421	rewriter.replaceOpWithNewOp<tensor::CastOp>(
3422	padTensorOp, padTensorOp.getResultType(), newOp);
3423	}
3424	return success();
3425	}
3426	};
3427
3428	// Fold CastOp using the result of PadOp back into the latter if it adds
3429	// static information.
3430	struct FoldTargetTensorCast : public OpRewritePattern<PadOp> {
3431	using OpRewritePattern<PadOp>::OpRewritePattern;
3432
3433	LogicalResult matchAndRewrite(PadOp padTensorOp,
3434	PatternRewriter &rewriter) const override {
3435	if (!padTensorOp.getResult().hasOneUse())
3436	return failure();
3437	auto tensorCastOp =
3438	dyn_cast<tensor::CastOp>(*padTensorOp->getUsers().begin());
3439	if (!tensorCastOp)
3440	return failure();
3441	if (!tensor::preservesStaticInformation(source: padTensorOp.getResult().getType(),
3442	target: tensorCastOp.getDest().getType()))
3443	return failure();
3444
3445	auto replacementOp = rewriter.create<PadOp>(
3446	padTensorOp.getLoc(), tensorCastOp.getDest().getType(),
3447	padTensorOp.getSource(), padTensorOp.getStaticLow(),
3448	padTensorOp.getStaticHigh(), padTensorOp.getLow(),
3449	padTensorOp.getHigh(), padTensorOp.getNofold(),
3450	getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames()));
3451	replacementOp.getRegion().takeBody(padTensorOp.getRegion());
3452
3453	rewriter.replaceOp(padTensorOp, replacementOp.getResult());
3454	rewriter.replaceOp(tensorCastOp, replacementOp.getResult());
3455	return success();
3456	}
3457	};
3458
3459	/// Fold chains of tensor::ExtractSliceOp, tensor::PadOp pairs that pad
3460	/// different dimensions. The pattern applies if the following preconditions
3461	/// hold:
3462	/// 1) the tensor::ExtractSliceOps are not rank-reducing,
3463	/// 2) the tensor::ExtractSliceOps have only unit-strides,
3464	/// 3) the tensor::PadOps perform only high-padding,
3465	/// 4) the tensor::PadOps have the same constant padding value,
3466	/// 5) the tensor::PadOps do not have common padding dimensions,
3467	/// 6) one tensor::ExtractSliceOp, tensor::PadOp pair has zero-padding and
3468	/// zero-offset for every dimension.
3469	/// 7) the tensor::ExtractSliceOp sizes match the source tensor sizes for
3470	/// the
3471	/// padded source dimensions.
3472	///
3473	/// Example:
3474	///
3475	/// ```mlir
3476	/// %0 = tensor.extract_slice %input[16, 0] [%sz0, 64] [1, 1]
3477	/// : tensor<64x64xf32> to tensor<?x64xf32>
3478	/// %1 = tensor.pad %0 low[0, 0] high[%pw0, 0] { ...
3479	/// } : tensor<?x64xf32> to tensor<8x64xf32>
3480	/// %2 = tensor.extract_slice %1[0, 4] [8, %sz1] [1, 1]
3481	/// : tensor<8x64xf32> to tensor<8x?xf32>
3482	/// %res = tensor.pad %2 nofold low[0, 0] high[0, %pw1] { ...
3483	/// } : tensor<8x?xf32> to tensor<8x4xf32>
3484	/// ```
3485	///
3486	/// folds into:
3487	///
3488	/// ```mlir
3489	/// %0 = tensor.extract_slice %input[16, 4] [%sz0, %sz1] [1, 1]
3490	/// : tensor<64x64xf32> to tensor<?x?xf32>
3491	/// %res = tensor.pad %0 nofold low[0, 0] high[%pw0, %pw1] { ...
3492	/// } : tensor<?x?xf32> to tensor<8x4xf32>
3493	/// ```
3494	struct FoldOrthogonalPaddings : public OpRewritePattern<PadOp> {
3495	using OpRewritePattern<PadOp>::OpRewritePattern;
3496
3497	LogicalResult matchAndRewrite(PadOp padOp,
3498	PatternRewriter &rewriter) const override {
3499	auto innerSliceOp = padOp.getSource().getDefiningOp<ExtractSliceOp>();
3500	if (!innerSliceOp)
3501	return failure();
3502	auto outerPadOp = innerSliceOp.getSource().getDefiningOp<PadOp>();
3503	if (!outerPadOp \|\| outerPadOp.getNofold())
3504	return failure();
3505	auto outerSliceOp = outerPadOp.getSource().getDefiningOp<ExtractSliceOp>();
3506	if (!outerSliceOp)
3507	return failure();
3508
3509	// 1) Fail if the chain is rank-reducing.
3510	int64_t rank = padOp.getSourceType().getRank();
3511	if (outerSliceOp.getSourceType().getRank() != rank) {
3512	return rewriter.notifyMatchFailure(padOp,
3513	"cannot fold rank-reducing chain");
3514	}
3515
3516	// 2) Fail if the tensor::ExtractSliceOps have non-unit strides.
3517	if (!innerSliceOp.hasUnitStride() \|\| !outerSliceOp.hasUnitStride()) {
3518	return rewriter.notifyMatchFailure(
3519	padOp, "cannot fold non-unit stride ExtractSliceOps");
3520	}
3521
3522	// 3) Fail if the tensor::PadOps have non-zero low padding.
3523	if (!padOp.hasZeroLowPad() \|\| !outerPadOp.hasZeroLowPad()) {
3524	return rewriter.notifyMatchFailure(padOp,
3525	"cannot fold PadOps with low padding");
3526	}
3527
3528	// 4) Fail if the tensor::PadOps padding values do not match.
3529	Attribute innerAttr, outerAttr;
3530	Value innerValue = padOp.getConstantPaddingValue();
3531	Value outerValue = outerPadOp.getConstantPaddingValue();
3532	if (!innerValue \|\| !outerValue \|\|
3533	!matchPattern(value: innerValue, pattern: m_Constant(bind_value: &innerAttr)) \|\|
3534	!matchPattern(value: outerValue, pattern: m_Constant(bind_value: &outerAttr)) \|\|
3535	innerAttr != outerAttr) {
3536	return rewriter.notifyMatchFailure(
3537	padOp, "cannot fold PadOps with different padding values");
3538	}
3539
3540	// 5) Fail if a dimension is padded by both tensor::PadOps.
3541	llvm::SmallBitVector innerDims = padOp.getPaddedDims();
3542	llvm::SmallBitVector outerDims = outerPadOp.getPaddedDims();
3543	if (innerDims.anyCommon(RHS: outerDims)) {
3544	return rewriter.notifyMatchFailure(
3545	padOp, "cannot fold PadOps with common padding dimensions");
3546	}
3547
3548	// 6) Combine the offsets of the two tensor::ExtractSliceOps. Find the
3549	// zero-offset and zero-padding tensor::ExtractSliceOp, tensor::PadOp pair
3550	// for every dimension, and use the offset the other pair. Fail if no
3551	// zero-offset and zero-padding tensor::ExtractSliceOp, tensor::PadOp pair
3552	// exists.
3553	SmallVector<OpFoldResult> newOffsets(rank, rewriter.getIndexAttr(`0`));
3554	for (auto en : enumerate(newOffsets)) {
3555	OpFoldResult innerOffset = innerSliceOp.getMixedOffsets()[en.index()];
3556	OpFoldResult outerOffset = outerSliceOp.getMixedOffsets()[en.index()];
3557	if (!innerDims.test(en.index()) &&
3558	(getConstantIntValue(innerOffset) == static_cast<int64_t>(`0`))) {
3559	en.value() = outerOffset;
3560	continue;
3561	}
3562	if (!outerDims.test(en.index()) &&
3563	(getConstantIntValue(outerOffset) == static_cast<int64_t>(`0`))) {
3564	en.value() = innerOffset;
3565	continue;
3566	}
3567	return rewriter.notifyMatchFailure(
3568	padOp, "cannot find zero-offset and zero-padding pair");
3569	}
3570
3571	// 7) Combine the sizes of the two tensor::ExtractSliceOps. Take the size
3572	// of the outer tensor::ExtractSliceOp for the dimensions padded by the
3573	// outer tensor::PadOp and fail if the size of the inner
3574	// tensor::ExtractSliceOp does not match the size of the padded dimension.
3575	// Otherwise, take the size of the inner tensor::ExtractSliceOp.
3576	SmallVector<OpFoldResult> newSizes = innerSliceOp.getMixedSizes();
3577	for (auto en : enumerate(newSizes)) {
3578	if (!outerDims.test(en.index()))
3579	continue;
3580	OpFoldResult sliceSize = innerSliceOp.getMixedSizes()[en.index()];
3581	int64_t sourceSize = innerSliceOp.getSourceType().getShape()[en.index()];
3582	assert(!ShapedType::isDynamic(sourceSize) &&
3583	"expected padded dimension to have a static size");
3584	if (getConstantIntValue(sliceSize) != sourceSize) {
3585	return rewriter.notifyMatchFailure(
3586	padOp, "cannot fold since the inner ExtractSliceOp size does not "
3587	"match the size of the outer padding");
3588	}
3589	en.value() = outerSliceOp.getMixedSizes()[en.index()];
3590	}
3591
3592	// Combine the high paddings of the two tensor::PadOps.
3593	SmallVector<OpFoldResult> newHighPad(rank, rewriter.getIndexAttr(`0`));
3594	for (auto en : enumerate(newHighPad)) {
3595	if (innerDims.test(en.index()))
3596	newHighPad[en.index()] = padOp.getMixedHighPad()[en.index()];
3597	if (outerDims.test(en.index()))
3598	newHighPad[en.index()] = outerPadOp.getMixedHighPad()[en.index()];
3599	}
3600
3601	// Create a new tensor::ExtractSliceOp, tensor::PadOp pair that performs
3602	// the two paddings in one step.
3603	auto newSliceOp = rewriter.create<ExtractSliceOp>(
3604	padOp.getLoc(), outerSliceOp.getSource(), newOffsets, newSizes,
3605	innerSliceOp.getMixedStrides());
3606	auto newPadOp = rewriter.create<PadOp>(
3607	padOp.getLoc(), padOp.getResultType(), newSliceOp.getResult(),
3608	padOp.getMixedLowPad(), newHighPad, padOp.getNofold(),
3609	getPrunedAttributeList(padOp, PadOp::getAttributeNames()));
3610	rewriter.inlineRegionBefore(padOp.getRegion(), newPadOp.getRegion(),
3611	newPadOp.getRegion().begin());
3612	rewriter.replaceOp(padOp, newPadOp.getResult());
3613	return success();
3614	}
3615	};
3616
3617	struct FoldStaticPadding : public OpRewritePattern<PadOp> {
3618	using OpRewritePattern<PadOp>::OpRewritePattern;
3619
3620	LogicalResult matchAndRewrite(PadOp padTensorOp,
3621	PatternRewriter &rewriter) const override {
3622	Value input = padTensorOp.getSource();
3623	if (!llvm::isa<RankedTensorType>(Val: input.getType()))
3624	return failure();
3625	auto inputDims = llvm::cast<RankedTensorType>(input.getType()).getShape();
3626	auto inputRank = inputDims.size();
3627
3628	auto oldResultType =
3629	dyn_cast<RankedTensorType>(padTensorOp.getResult().getType());
3630	if (!oldResultType)
3631	return failure();
3632
3633	auto outputDims = oldResultType.getShape();
3634
3635	// Extract the static info from the high and low operands.
3636	SmallVector<int64_t> constOperandsLow;
3637	SmallVector<Value> newLows;
3638	for (auto operand : padTensorOp.getLow()) {
3639	APSInt intOp;
3640	if (!matchPattern(operand, m_ConstantInt(&intOp))) {
3641	constOperandsLow.push_back(ShapedType::kDynamic);
3642	newLows.push_back(operand);
3643	continue;
3644	}
3645	constOperandsLow.push_back(intOp.getExtValue());
3646	}
3647	SmallVector<int64_t> constOperandsHigh;
3648	SmallVector<Value> newHighs;
3649	for (auto operand : padTensorOp.getHigh()) {
3650	APSInt intOp;
3651	if (!matchPattern(operand, m_ConstantInt(&intOp))) {
3652	constOperandsHigh.push_back(ShapedType::kDynamic);
3653	newHighs.push_back(operand);
3654	continue;
3655	}
3656	constOperandsHigh.push_back(intOp.getExtValue());
3657	}
3658
3659	SmallVector<int64_t> constLow(padTensorOp.getStaticLow());
3660	SmallVector<int64_t> constHigh(padTensorOp.getStaticHigh());
3661
3662	// Verify the op is well-formed.
3663	if (inputDims.size() != outputDims.size() \|\|
3664	inputDims.size() != constLow.size() \|\|
3665	inputDims.size() != constHigh.size())
3666	return failure();
3667
3668	auto lowCount = `0`;
3669	auto highCount = `0`;
3670	for (size_t i = `0`; i < inputRank; i++) {
3671	if (constLow[i] == ShapedType::kDynamic)
3672	constLow [i] = constOperandsLow [lowCount++];
3673	if (constHigh[i] == ShapedType::kDynamic)
3674	constHigh [i] = constOperandsHigh [highCount++];
3675	}
3676
3677	auto staticLow = ArrayRef<int64_t>(constLow);
3678	auto staticHigh = ArrayRef<int64_t>(constHigh);
3679
3680	// Calculate the output sizes with the static information.
3681	SmallVector<int64_t> newOutDims;
3682	for (size_t i = `0`; i < inputRank; i++) {
3683	if (outputDims[i] == ShapedType::kDynamic) {
3684	newOutDims.push_back(
3685	(staticLow[i] == ShapedType::kDynamic \|\|
3686	staticHigh[i] == ShapedType::kDynamic \|\|
3687	inputDims[i] == ShapedType::kDynamic
3688	? ShapedType::kDynamic
3689	: inputDims[i] + staticLow[i] + staticHigh[i]));
3690	} else {
3691	newOutDims.push_back(Elt: outputDims[i]);
3692	}
3693	}
3694
3695	if (SmallVector<int64_t>(outputDims) == newOutDims \|\|
3696	llvm::all_of(Range&: newOutDims,
3697	P: [&](int64_t x) { return x == ShapedType::kDynamic; }))
3698	return failure();
3699
3700	// Rewrite the op using the new static type.
3701	auto newResultType = RankedTensorType::get(
3702	newOutDims, padTensorOp.getType().getElementType());
3703	auto newOp = rewriter.create<PadOp>(
3704	padTensorOp->getLoc(), newResultType, input, staticLow, staticHigh,
3705	newLows, newHighs, padTensorOp.getNofold(),
3706	getPrunedAttributeList(padTensorOp, PadOp::getAttributeNames()));
3707
3708	IRMapping mapper;
3709	padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
3710	rewriter.replaceOpWithNewOp<tensor::CastOp>(padTensorOp, oldResultType,
3711	newOp);
3712
3713	return success();
3714	}
3715	};
3716
3717	/// Folds a chain of `tensor.pad` ops with the same constant padding value.
3718	///
3719	/// Example:
3720	///
3721	/// ```mlir
3722	/// %1 = tensor.pad %0 low[0, 1] high[0, 2] {
3723	/// tensor.yield %val
3724	/// } : tensor<1x2xf32> to tensor<2x5xf32>
3725	/// %res = tensor.pad %1 low[0, 2] high[3, 0] {
3726	/// tensor.yield %val
3727	/// } : tensor<1x5xf32> to tensor<5x7xf32>
3728	/// ```
3729	///
3730	/// folds into:
3731	///
3732	/// ```mlir
3733	/// %res = tensor.pad %0 low[0, 3] high[3, 2] {
3734	/// tensor.yield %val
3735	/// } : tensor<1x2xf32> to tensor<5x7xf32>
3736	/// ```
3737	struct FoldConsecutiveConstantPadding : public OpRewritePattern<tensor::PadOp> {
3738	using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
3739
3740	LogicalResult matchAndRewrite(tensor::PadOp padOp,
3741	PatternRewriter &rewriter) const override {
3742	if (padOp.getNofold()) {
3743	return rewriter.notifyMatchFailure(padOp, "skipping unfoldable pad");
3744	}
3745
3746	auto producerPad = padOp.getSource().getDefiningOp<tensor::PadOp>();
3747	if (!producerPad \|\| producerPad.getNofold()) {
3748	return rewriter.notifyMatchFailure(
3749	padOp, "producer is not a foldable tensor.pad op");
3750	}
3751
3752	// Fail if the tensor::PadOps padding values do not match.
3753	Value consumerPadValue = padOp.getConstantPaddingValue();
3754	Value producerPadValue = producerPad.getConstantPaddingValue();
3755	if (!consumerPadValue \|\| !producerPadValue \|\|
3756	consumerPadValue != producerPadValue) {
3757	return rewriter.notifyMatchFailure(
3758	padOp,
3759	"cannot fold PadOps with different or non-constant padding values");
3760	}
3761
3762	Location loc = padOp.getLoc();
3763	AffineExpr d0, d1;
3764	bindDims(ctx: rewriter.getContext(), exprs&: d0, exprs&: d1);
3765
3766	// Combine the low/high paddings of the two tensor::PadOps.
3767	auto addPaddings = [&](ArrayRef<OpFoldResult> consumerPaddings,
3768	ArrayRef<OpFoldResult> producerPaddings) {
3769	SmallVector<OpFoldResult> sumPaddings;
3770	for (auto [consumerIndex, producerIndex] :
3771	llvm::zip_equal(t&: consumerPaddings, u&: producerPaddings)) {
3772	sumPaddings.push_back(Elt: affine::makeComposedFoldedAffineApply(
3773	b&: rewriter, loc, expr: d0 + d1, operands: {consumerIndex, producerIndex}));
3774	}
3775	return sumPaddings;
3776	};
3777
3778	SmallVector<OpFoldResult> newHighPad =
3779	addPaddings(padOp.getMixedHighPad(), producerPad.getMixedHighPad());
3780	SmallVector<OpFoldResult> newLowPad =
3781	addPaddings(padOp.getMixedLowPad(), producerPad.getMixedLowPad());
3782
3783	auto newPadOp = rewriter.create<tensor::PadOp>(
3784	padOp.getLoc(), padOp.getResultType(), producerPad.getSource(),
3785	newLowPad, newHighPad, padOp.getNofold(),
3786	getPrunedAttributeList(padOp, tensor::PadOp::getAttributeNames()));
3787	rewriter.inlineRegionBefore(padOp.getRegion(), newPadOp.getRegion(),
3788	newPadOp.getRegion().begin());
3789	rewriter.replaceOp(padOp, newPadOp.getResult());
3790	return success();
3791	}
3792	};
3793
3794	} // namespace
3795
3796	void PadOp::getCanonicalizationPatterns(RewritePatternSet &results,
3797	MLIRContext *context) {
3798	results.add<FoldStaticZeroPadding, FoldSourceTensorCast, FoldTargetTensorCast,
3799	FoldOrthogonalPaddings, FoldStaticPadding,
3800	FoldConsecutiveConstantPadding>(context);
3801	}
3802
3803	/// Return the padding value of the PadOp if it constant. In this context,
3804	/// "constant" means an actual constant or "defined outside of the block".
3805	///
3806	/// Values are considered constant in three cases:
3807	/// - A ConstantLike value.
3808	/// - A basic block argument from a different block.
3809	/// - A value defined outside of the block.
3810	///
3811	/// If the padding value is not constant, an empty Value is returned.
3812	Value PadOp::getConstantPaddingValue() {
3813	auto yieldOp = dyn_cast<YieldOp>(getRegion().front().getTerminator());
3814	if (!yieldOp)
3815	return {};
3816	Value padValue = yieldOp.getValue();
3817	// Check if yield value is a constant.
3818	if (matchPattern(padValue, m_Constant()))
3819	return padValue;
3820	// Check if yield value is defined inside the PadOp block.
3821	if (padValue.getParentBlock() == &getRegion().front())
3822	return {};
3823	// Else: Yield value defined outside of the PadOp block.
3824	return padValue;
3825	}
3826
3827	OpFoldResult PadOp::fold(FoldAdaptor) {
3828	if (getResultType().hasStaticShape() && getResultType() == getSourceType() &&
3829	!getNofold())
3830	return getSource();
3831	return {};
3832	}
3833
3834	//===----------------------------------------------------------------------===//
3835	// ParallelInsertSliceOp
3836	//===----------------------------------------------------------------------===//
3837
3838	OpResult ParallelInsertSliceOp::getTiedOpResult() {
3839	ParallelCombiningOpInterface parallelCombiningParent =
3840	getParallelCombiningParent();
3841	for (const auto &it :
3842	llvm::enumerate(parallelCombiningParent.getYieldingOps())) {
3843	Operation &nextOp = it.value();
3844	if (&nextOp == getOperation())
3845	return parallelCombiningParent.getParentResult(it.index());
3846	}
3847	llvm_unreachable("ParallelInsertSliceOp no tied OpResult found");
3848	}
3849
3850	// Build a ParallelInsertSliceOp with mixed static and dynamic entries.
3851	void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
3852	Value source, Value dest,
3853	ArrayRef<OpFoldResult> offsets,
3854	ArrayRef<OpFoldResult> sizes,
3855	ArrayRef<OpFoldResult> strides,
3856	ArrayRef<NamedAttribute> attrs) {
3857	SmallVector<int64_t> staticOffsets, staticSizes, staticStrides;
3858	SmallVector<Value> dynamicOffsets, dynamicSizes, dynamicStrides;
3859	dispatchIndexOpFoldResults(offsets, dynamicOffsets, staticOffsets);
3860	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticSizes);
3861	dispatchIndexOpFoldResults(strides, dynamicStrides, staticStrides);
3862	result.addAttributes(attrs);
3863	build(b, result, {}, source, dest, dynamicOffsets, dynamicSizes,
3864	dynamicStrides, b.getDenseI64ArrayAttr(staticOffsets),
3865	b.getDenseI64ArrayAttr(staticSizes),
3866	b.getDenseI64ArrayAttr(staticStrides));
3867	}
3868
3869	/// Build an ParallelInsertSliceOp with mixed static and dynamic entries
3870	/// packed into a Range vector.
3871	void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
3872	Value source, Value dest,
3873	ArrayRef<Range> ranges,
3874	ArrayRef<NamedAttribute> attrs) {
3875	auto [offsets, sizes, strides] = getOffsetsSizesAndStrides(ranges);
3876	build(b, result, source, dest, offsets, sizes, strides, attrs);
3877	}
3878
3879	// Build a ParallelInsertSliceOp with dynamic entries.
3880	void ParallelInsertSliceOp::build(OpBuilder &b, OperationState &result,
3881	Value source, Value dest, ValueRange offsets,
3882	ValueRange sizes, ValueRange strides,
3883	ArrayRef<NamedAttribute> attrs) {
3884	SmallVector<OpFoldResult> offsetValues = llvm::to_vector<`4`>(
3885	llvm::map_range(offsets, [](Value v) -> OpFoldResult { return v; }));
3886	SmallVector<OpFoldResult> sizeValues = llvm::to_vector<`4`>(
3887	llvm::map_range(sizes, [](Value v) -> OpFoldResult { return v; }));
3888	SmallVector<OpFoldResult> strideValues = llvm::to_vector<`4`>(
3889	llvm::map_range(strides, [](Value v) -> OpFoldResult { return v; }));
3890	build(b, result, source, dest, offsetValues, sizeValues, strideValues);
3891	}
3892
3893	LogicalResult ParallelInsertSliceOp::verify() {
3894	if (!isa<ParallelCombiningOpInterface>(getOperation()->getParentOp()))
3895	return this->emitError("expected ParallelCombiningOpInterface parent, got:")
3896	<< *(getOperation()->getParentOp());
3897
3898	// Verify result type against inferred type.
3899	RankedTensorType expectedType;
3900	SliceVerificationResult result =
3901	verifyInsertSliceOp(getSourceType(), getDestType(), getStaticOffsets(),
3902	getStaticSizes(), getStaticStrides(), &expectedType);
3903	if (result != SliceVerificationResult::Success)
3904	return produceSliceErrorMsg(result, *this, expectedType);
3905
3906	// Verify that offsets, sizes, strides do not run out-of-bounds with respect
3907	// to the destination tensor.
3908	SliceBoundsVerificationResult boundsResult = verifyInBoundsSlice(
3909	getDestType().getShape(), getStaticOffsets(), getStaticSizes(),
3910	getStaticStrides(), /generateErrorMessage=/true);
3911	if (!boundsResult.isValid)
3912	return getOperation()->emitError(boundsResult.errorMessage);
3913
3914	return success();
3915	}
3916
3917	void ParallelInsertSliceOp::getCanonicalizationPatterns(
3918	RewritePatternSet &results, MLIRContext *context) {
3919	results.add<InsertSliceOpConstantArgumentFolder<ParallelInsertSliceOp>,
3920	InsertSliceOpCastFolder<ParallelInsertSliceOp>,
3921	InsertSliceOpSourceCastInserter<ParallelInsertSliceOp>>(context);
3922	}
3923
3924	llvm::SmallBitVector ParallelInsertSliceOp::getDroppedDims() {
3925	return ::getDroppedDims(getSourceType().getShape(), getMixedSizes());
3926	}
3927
3928	//===----------------------------------------------------------------------===//
3929	// ScatterOp
3930	//===----------------------------------------------------------------------===//
3931
3932	void ScatterOp::getAsmResultNames(
3933	function_ref<void(Value, StringRef)> setNameFn) {
3934	setNameFn(getResult(), "scatter");
3935	}
3936
3937	LogicalResult ScatterOp::verify() {
3938	int64_t destRank = getDestType().getRank();
3939	ArrayRef<int64_t> scatterDims = getScatterDims();
3940	if (failed(verifyGatherOrScatterDims(getOperation(), scatterDims,
3941	getIndicesType().getShape(), destRank,
3942	"scatter", "dest")))
3943	return failure();
3944
3945	if (!getUnique())
3946	return emitOpError("requires 'unique' attribute to be set");
3947	// TODO: we could also check statically that there are fewer leading index
3948	// tensor dims than the dest dims. If this is not the case, the unique
3949	// attribute cannot be true.
3950
3951	// Use the GatherOp::inferResultType on the `dest` type and verify the
3952	// expected type matches the source type.
3953	RankedTensorType expectedSourceType = GatherOp::inferResultType(
3954	getDestType(), getIndicesType(), scatterDims, /rankReduced=/false);
3955	RankedTensorType expectedRankReducedSourceType = GatherOp::inferResultType(
3956	getDestType(), getIndicesType(), scatterDims, /rankReduced=/true);
3957	if (getSourceType() != expectedSourceType &&
3958	getSourceType() != expectedRankReducedSourceType) {
3959	return emitOpError("source type "
3960	"mismatch: "
3961	"expected ")
3962	<< expectedSourceType << " or its rank-reduced variant "
3963	<< expectedRankReducedSourceType << " (got: " << getSourceType()
3964	<< ")";
3965	}
3966
3967	return success();
3968	}
3969
3970	//===----------------------------------------------------------------------===//
3971	// SplatOp
3972	//===----------------------------------------------------------------------===//
3973
3974	void SplatOp::build(OpBuilder &builder, OperationState &result, Value element,
3975	Type aggregateType, ValueRange dynamicSizes) {
3976	build(builder, result, aggregateType, element, dynamicSizes);
3977	}
3978
3979	void SplatOp::build(OpBuilder &builder, OperationState &result, Value element,
3980	ArrayRef<int64_t> staticShape, ValueRange dynamicSizes) {
3981	auto aggregateType = RankedTensorType::get(staticShape, element.getType());
3982	build(builder, result, aggregateType, element, dynamicSizes);
3983	}
3984
3985	void SplatOp::build(OpBuilder &builder, OperationState &result, Value element,
3986	ArrayRef<OpFoldResult> sizes) {
3987	SmallVector<int64_t> staticShape;
3988	SmallVector<Value> dynamicSizes;
3989	dispatchIndexOpFoldResults(sizes, dynamicSizes, staticShape);
3990	build(builder, result, element, staticShape, dynamicSizes);
3991	}
3992
3993	void SplatOp::getAsmResultNames(
3994	function_ref<void(Value, StringRef)> setNameFn) {
3995	setNameFn(getResult(), "splat");
3996	}
3997
3998	LogicalResult SplatOp::verify() {
3999	if (getType().getNumDynamicDims() != getDynamicSizes().size())
4000	return emitOpError("incorrect number of dynamic sizes, has ")
4001	<< getDynamicSizes().size() << ", expected "
4002	<< getType().getNumDynamicDims();
4003	return success();
4004	}
4005
4006	LogicalResult
4007	SplatOp::reifyResultShapes(OpBuilder &builder,
4008	ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
4009	reifiedReturnShapes.resize(`1`, SmallVector<OpFoldResult>(getType().getRank()));
4010	unsigned ctr = `0`;
4011	for (int64_t i = `0`; i < getType().getRank(); ++i) {
4012	if (getType().isDynamicDim(i)) {
4013	reifiedReturnShapes[`0`][i] = getDynamicSizes()[ctr++];
4014	} else {
4015	reifiedReturnShapes[`0`][i] = builder.getIndexAttr(getType().getDimSize(i));
4016	}
4017	}
4018	return success();
4019	}
4020
4021	OpFoldResult SplatOp::fold(FoldAdaptor adaptor) {
4022	auto constOperand = adaptor.getInput();
4023	if (!isa_and_nonnull<IntegerAttr, FloatAttr>(constOperand))
4024	return {};
4025
4026	// Do not fold if the splat is not statically shaped
4027	if (!getType().hasStaticShape())
4028	return {};
4029
4030	// SplatElementsAttr::get treats single value for second arg as being a
4031	// splat.
4032	return SplatElementsAttr::get(getType(), {constOperand});
4033	}
4034
4035	//===----------------------------------------------------------------------===//
4036	// Common Canonicalizers and Folders.
4037	//===----------------------------------------------------------------------===//
4038	bool foldTensorCastPrecondition(DestinationStyleOpInterface op) {
4039	// 1. InsertSliceOp has its own logic about folding tensor.cast ops.
4040	// 2. Exclude DPS ops that are also LoopLike from this interface as they
4041	// might need special handling of attached regions.
4042	if (isa<InsertSliceOp>(op.getOperation()) \|\|
4043	isa<LoopLikeOpInterface>(op.getOperation()))
4044	return false;
4045
4046	return hasFoldableTensorCastOperand(op);
4047	}
4048
4049	/// Folds a tensor.cast op into a consuming DestinationStyleOpInterface op if
4050	/// the `tensor.cast` has source that is more static than the consuming op.
4051	///
4052	/// Example:
4053	/// ```mlir
4054	/// %1 = tensor.cast %0 : tensor<8x16xf32> to tensor<?x?xf32>
4055	/// %2 = consumer %1 ... : tensor<?x?xf32> ...
4056	/// ```
4057	///
4058	/// folds into:
4059	///
4060	/// ```mlir
4061	/// %2 = consumer %0 ... : tensor<8x16xf32> ...
4062	/// ```
4063	/// TODO: Move the pattern to a proper place, so all other DestinationStyleOp
4064	/// can add the pattern to their canonicalizers.
4065	struct FoldTensorCastProducerOp
4066	: public OpInterfaceRewritePattern<DestinationStyleOpInterface> {
4067	using OpInterfaceRewritePattern<
4068	DestinationStyleOpInterface>::OpInterfaceRewritePattern;
4069
4070	LogicalResult matchAndRewrite(DestinationStyleOpInterface op,
4071	PatternRewriter &rewriter) const override {
4072
4073	// Reject PackOp/UnpackOp (i.e. RelayoutOps) - there are dedicated patterns
4074	// for that instead.
4075	if (!foldTensorCastPrecondition(op) \|\|
4076	isa<linalg::RelayoutOpInterface>(*op))
4077	return failure();
4078
4079	SmallVector<Type> newResultTypes(op->getResultTypes());
4080	SmallVector<Value> newOperands =
4081	getUpdatedOperandsAfterCastOpFolding(op, newResultTypes);
4082
4083	// Clone op
4084	auto newOp = clone(rewriter, op, newResultTypes, newOperands);
4085
4086	SmallVector<Value, `4`> replacements;
4087	replacements.reserve(N: newOp->getNumResults());
4088	for (auto [oldResult, newResult] :
4089	llvm::zip(op->getResults(), newOp->getResults())) {
4090	if (newResult.getType() != oldResult.getType()) {
4091	replacements.push_back(rewriter.create<tensor::CastOp>(
4092	op->getLoc(), oldResult.getType(), newResult));
4093	} else {
4094	replacements.push_back(newResult);
4095	}
4096	}
4097	rewriter.replaceOp(op, replacements);
4098
4099	return success();
4100	}
4101	};
4102
4103	//===----------------------------------------------------------------------===//
4104	// TensorDialect
4105	//===----------------------------------------------------------------------===//
4106
4107	void TensorDialect::getCanonicalizationPatterns(
4108	RewritePatternSet &results) const {
4109	results.add<FoldTensorCastProducerOp>(getContext());
4110	}
4111
4112	//===----------------------------------------------------------------------===//
4113	// TableGen'd op method definitions
4114	//===----------------------------------------------------------------------===//
4115
4116	#define GET_OP_CLASSES
4117	#include "mlir/Dialect/Tensor/IR/TensorOps.cpp.inc"
4118

Provided by KDAB

Definitions

getMixedSize
getMixedSizes
getOrCreateDestination
getOrCreateDestinations
isSameTypeWithoutEncoding
getDroppedDims
foldDynamicToStaticDimSizes
ChainedTensorBitcast
matchAndRewrite
preservesStaticInformation
canFoldIntoConsumerOp
canFoldIntoProducerOp
hasFoldableTensorCastOperand
getUpdatedOperandsAfterCastOpFolding
foldTensorCast
joinShapes
ChainedTensorCast
matchAndRewrite
TensorCastExtractSlice
matchAndRewrite
SingleInputConcatOp
matchAndRewrite
InferConcatOperandTypes
matchAndRewrite
InferConcatResultType
matchAndRewrite
DimOfCastOp
matchAndRewrite
DimOfDestStyleOp
matchAndRewrite
DimOfReshapeOp
matchAndRewrite
ReplaceEmptyTensorStaticShapeDims
matchAndRewrite
FoldEmptyTensorWithDimOp
matchAndRewrite
FoldEmptyTensorWithCastOp
matchAndRewrite
ExtractFromTensorCast
matchAndRewrite
ExtractFromCollapseShape
matchAndRewrite
foldExtractAfterInsert
populateFoldCollapseExtractPatterns
ExtractElementFromIndexCast
matchAndRewrite
verifyGatherOrScatterDims
StaticTensorGenerate
matchAndRewrite
ExtractFromTensorGenerate
matchAndRewrite
getNumElements
verifyTensorReshapeOp
FoldReshapeWithConstant
matchAndRewrite
FoldReshapeWithSplat
matchAndRewrite
FoldReshapeWithFromElements
matchAndRewrite
FoldCollapseOfCastOp
matchAndRewrite
ConvertToStaticExpandShape
matchAndRewrite
produceSliceErrorMsg
ExtractSliceOpCastFolder
matchAndRewrite
sliceElements
ConstantOpExtractSliceFolder
ConstantOpExtractSliceFolder
matchAndRewrite
populateFoldConstantExtractSlicePatterns
SliceReturnTypeCanonicalizer
operator()
SliceCanonicalizer
operator()
foldIdentityOffsetSizeAndStrideOpInterface
foldExtractAfterInsertSlice
createCanonicalRankReducingExtractSliceOp
verifyInsertSliceOp
foldInsertAfterInsertSlice
foldInsertAfterExtractSlice
InsertSliceOpConstantArgumentFolder
matchAndRewrite
InsertSliceOpCastFolder
matchAndRewrite
InsertSliceOpSourceCastInserter
matchAndRewrite
createCanonicalRankReducingInsertSliceOp
printInferType
parseInferType
FoldStaticZeroPadding
matchAndRewrite
FoldSourceTensorCast
matchAndRewrite
FoldTargetTensorCast
matchAndRewrite
FoldOrthogonalPaddings
matchAndRewrite
FoldStaticPadding
matchAndRewrite
FoldConsecutiveConstantPadding
matchAndRewrite
foldTensorCastPrecondition
FoldTensorCastProducerOp

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of mlir/lib/Dialect/Tensor/IR/TensorOps.cpp