ConvertToDestinationStyle.cpp source code [mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp]

1	//===- ConvertToDestinationStyle.cpp - Convert non-DPS to DPS ops ---------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file contains patterns to convert non-DPS ops to DPS ops. New
10	// tensor.empty ops are inserted as a destination. Such tensor.empty can be
11	// eliminated with "empty tensor elimination", allowing them to bufferize
12	// without an allocation (assuming there are no further conflicts).
13	//
14	//===----------------------------------------------------------------------===//
15	//
16	#include "mlir/Dialect/Arith/IR/Arith.h"
17	#include "mlir/Dialect/Arith/Utils/Utils.h"
18	#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
19	#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
20	#include "mlir/Dialect/Linalg/IR/Linalg.h"
21	#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
22	#include "mlir/Dialect/Tensor/IR/Tensor.h"
23	#include "mlir/Dialect/Utils/StaticValueUtils.h"
24	#include "mlir/IR/Matchers.h"
25	#include "mlir/IR/PatternMatch.h"
26	#include "llvm/ADT/STLExtras.h"
27	#include "llvm/Support/Debug.h"
28
29	using namespace mlir;
30	using namespace mlir::tensor;
31
32	// Implements backtracking to traverse indices of the output buffer while
33	// iterating over op.elements().
34	static Value createInserts(RewriterBase &rewriter, Location loc, int dim,
35	Value destination, ArrayRef<int64_t> shape,
36	ArrayRef<Value> constants,
37	OperandRange::iterator &elementIt,
38	SmallVectorImpl<Value> &indices) {
39	if (dim == static_cast<int>(shape.size()) - `1`) {
40	for (int i = `0`; i < shape.back(); ++i) {
41	indices.back() = constants [i];
42	destination = rewriter.create<tensor::InsertOp>(loc, *elementIt,
43	destination, indices);
44	++elementIt;
45	}
46	return destination;
47	}
48	for (int i = `0`; i < shape [dim]; ++i) {
49	indices [dim] = constants [i];
50	destination = createInserts(rewriter, loc, dim: dim + `1`, destination, shape,
51	constants, elementIt, indices);
52	}
53	return destination;
54	}
55
56	/// Create a memcpy from the given source tensor to the given destination
57	/// memref. The copy op type can be specified in the `options`.
58	static void createMemcpy(OpBuilder &b, Location loc, Value tensorSource,
59	Value memrefDest,
60	const linalg::BufferizeToAllocationOptions &options) {
61	auto tensorType = dyn_cast<RankedTensorType>(tensorSource.getType());
62	assert(tensorType && "expected ranked tensor");
63	assert(isa<MemRefType>(memrefDest.getType()) && "expected ranked memref");
64
65	switch (options.memcpyOp) {
66	case linalg::BufferizeToAllocationOptions::MemcpyOp::
67	MaterializeInDestination: {
68	// Note: This is the preferred way of memcpy'ing because no layout map
69	// and/or memory space must be specified for the source.
70	auto materializeOp = b.create<bufferization::MaterializeInDestinationOp>(
71	loc, tensorSource, memrefDest);
72	materializeOp.setWritable(true);
73	} break;
74	case linalg::BufferizeToAllocationOptions::MemcpyOp::MemrefCopy: {
75	// TODO: Support custom memory space on source.
76	// We do not know the layout map of the source yet, so use a fully dynamic
77	// layout for best compatibility.
78	Value toBuffer = b.create<bufferization::ToBufferOp>(
79	loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),
80	tensorSource, /readOnly=/true);
81	b.create<memref::CopyOp>(loc, toBuffer, memrefDest);
82	} break;
83	case linalg::BufferizeToAllocationOptions::MemcpyOp::LinalgCopy: {
84	// TODO: Support custom memory space on source.
85	// We do not know the layout map of the source yet, so use a fully dynamic
86	// layout for best compatibility.
87	Value toBuffer = b.create<bufferization::ToBufferOp>(
88	loc, bufferization::getMemRefTypeWithFullyDynamicLayout(tensorType),
89	tensorSource, /readOnly=/true);
90	b.create<linalg::CopyOp>(loc, toBuffer, memrefDest);
91	} break;
92	};
93	}
94
95	static Operation *movePaddingToFillOrGenericOp(RewriterBase &rewriter,
96	Location loc, PadOp padOp,
97	Value dest) {
98	OpBuilder::InsertionGuard g(rewriter);
99	RankedTensorType resultType = padOp.getResultType();
100
101	// Examine the yielded value to decide if a linalg.generic is neede or a
102	// linalg.fill is sufficient.
103	Value yieldedValue =
104	cast<tensor::YieldOp>(padOp.getBody()->getTerminator()).getValue();
105	Attribute constYieldedValue;
106	// Is the yielded value a bbArg defined outside of the PadOp?
107	bool outsideBbArg =
108	isa<BlockArgument>(Val: yieldedValue) &&
109	cast<BlockArgument>(Val&: yieldedValue).getOwner()->getParentOp() !=
110	padOp.getOperation();
111	// Is the yielded value an OpResult defined outside of the PadOp?
112	bool outsideOpResult =
113	isa<OpResult>(Val: yieldedValue) &&
114	yieldedValue.getDefiningOp()->getParentOp() != padOp.getOperation();
115	bool invariantYieldedValue = outsideBbArg \|\| outsideOpResult;
116	if (matchPattern(value: yieldedValue, pattern: m_Constant(bind_value: &constYieldedValue))) {
117	// Padding with a constant: Create linalg.fill.
118	Dialect *arithDialect =
119	rewriter.getContext()->getLoadedDialect<arith::ArithDialect>();
120	Value fillValue =
121	arithDialect
122	->materializeConstant(builder&: rewriter, value: constYieldedValue,
123	type: yieldedValue.getType(), loc: yieldedValue.getLoc())
124	->getResult(idx: `0`);
125	auto fillOp = rewriter.create<linalg::FillOp>(loc, ValueRange(fillValue),
126	ValueRange(dest));
127	return fillOp;
128	}
129
130	if (invariantYieldedValue) {
131	// Padding with an invariant value.
132	auto fillOp = rewriter.create<linalg::FillOp>(loc, ValueRange(yieldedValue),
133	ValueRange(dest));
134	return fillOp;
135	}
136
137	// Create linalg.generic.
138	SmallVector<utils::IteratorType> iteratorTypes(resultType.getRank(),
139	utils::IteratorType::parallel);
140	SmallVector<AffineMap> indexingMaps(
141	`1`, rewriter.getMultiDimIdentityMap(rank: resultType.getRank()));
142	auto genericOp = rewriter.create<linalg::GenericOp>(
143	loc, resultType, /inputs=/ValueRange (),
144	/outputs=/ValueRange {dest}, /indexingMaps=/
145	indexingMaps, iteratorTypes);
146	Block *body = rewriter.createBlock(&genericOp->getRegion(`0`), {},
147	resultType.getElementType(), loc);
148	rewriter.setInsertionPointToStart(body);
149	SmallVector<Value> bbArgReplacements;
150	for (int64_t i = `0`; i < resultType.getRank(); ++i)
151	bbArgReplacements.push_back(rewriter.create<linalg::IndexOp>(loc, i));
152	rewriter.mergeBlocks(source: padOp.getBody(), dest: body, argValues: bbArgReplacements);
153
154	// Update terminator.
155	auto yieldOp = cast<tensor::YieldOp>(body->getTerminator());
156	rewriter.replaceOpWithNewOp<linalg::YieldOp>(yieldOp, yieldOp.getValue());
157	return genericOp;
158	}
159
160	static SmallVector<Value> reifyOrComputeDynamicSizes(OpBuilder &b,
161	Value value) {
162	auto tensorType = cast<RankedTensorType>(value.getType());
163	if (tensorType.hasStaticShape())
164	return {};
165
166	// Try to reify dynamic sizes.
167	ReifiedRankedShapedTypeDims reifiedShape;
168	if (isa<OpResult>(Val: value) &&
169	succeeded(Result: reifyResultShapes(b, op: value.getDefiningOp(), reifiedReturnShapes&: reifiedShape))) {
170	SmallVector<Value> dynSizes;
171	for (int64_t i = `0`; i < tensorType.getRank(); ++i) {
172	if (tensorType.isDynamicDim(i))
173	dynSizes.push_back(Elt: cast<Value>(
174	Val&: reifiedShape [cast<OpResult>(Val&: value).getResultNumber()][i]));
175	}
176	return dynSizes;
177	}
178
179	// Create tensor.dim ops.
180	SmallVector<Value> dynSizes;
181	for (int64_t i = `0`; i < tensorType.getRank(); ++i) {
182	if (tensorType.isDynamicDim(i))
183	dynSizes.push_back(
184	b.create<DimOp>(value.getLoc(), value,
185	b.create<arith::ConstantIndexOp>(value.getLoc(), i)));
186	}
187	return dynSizes;
188	}
189
190	static Value
191	createAllocationForTensor(RewriterBase &rewriter, Location loc, Value value,
192	const linalg::BufferizeToAllocationOptions &options,
193	Attribute memorySpace = {}) {
194	OpBuilder::InsertionGuard g(rewriter);
195	auto tensorType = cast<RankedTensorType>(value.getType());
196
197	// Create buffer allocation.
198	auto memrefType =
199	cast<MemRefType>(bufferization::getMemRefTypeWithStaticIdentityLayout(
200	tensorType: tensorType, memorySpace));
201	SmallVector<Value> dynamicSizes = reifyOrComputeDynamicSizes(b&: rewriter, value);
202
203	Value alloc;
204	if (options.allocOp ==
205	linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloc) {
206	alloc = rewriter.create<memref::AllocOp>(loc, memrefType, dynamicSizes);
207	if (options.emitDealloc) {
208	// Place deallocation at the end of the block.
209	rewriter.setInsertionPoint(rewriter.getInsertionBlock()->getTerminator());
210	rewriter.create<memref::DeallocOp>(loc, alloc);
211	}
212	} else if (options.allocOp ==
213	linalg::BufferizeToAllocationOptions::AllocOp::MemrefAlloca) {
214	alloc = rewriter.create<memref::AllocaOp>(loc, memrefType, dynamicSizes);
215	// No dealloc is needed.
216	}
217
218	return alloc;
219	}
220
221	Value linalg::bufferizeToAllocation(
222	RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
223	PadOp padOp, Attribute memorySpace, Operation *insertionPoint) {
224	// tensor.pad does not have a destination operand.
225	assert(!options.bufferizeDestinationOnly && "invalid options");
226
227	OpBuilder::InsertionGuard g(rewriter);
228	rewriter.setInsertionPoint(insertionPoint ? insertionPoint : padOp);
229	Location loc = padOp.getLoc();
230
231	// Create buffer allocation.
232	Value alloc = createAllocationForTensor(rewriter, loc, padOp.getResult(),
233	options, memorySpace);
234	rewriter.setInsertionPoint(padOp);
235
236	if (!padOp.hasZeroLowPad() \|\| !padOp.hasZeroHighPad()) {
237	// Create linalg.fill or linalg.generic. Not needed if there is no padding.
238	Operation *fillOp =
239	movePaddingToFillOrGenericOp(rewriter, loc, padOp, alloc);
240	rewriter.setInsertionPointAfter(fillOp);
241	}
242
243	// Create memcpy.
244	SmallVector<OpFoldResult> sizes =
245	getMixedSizes(rewriter, loc, padOp.getSource());
246	SmallVector<OpFoldResult> strides(padOp.getResultType().getRank(),
247	rewriter.getIndexAttr(`1`));
248	Value subview = rewriter.create<memref::SubViewOp>(
249	loc, alloc, /offsets=/padOp.getMixedLowPad(), sizes, strides);
250	createMemcpy(rewriter, loc, padOp.getSource(), subview, options);
251
252	// Create bufferization.to_tensor with "restrict" and "writable". The returned
253	// tensor is a new buffer allocation, so it does not alias with any buffer.
254	Value toTensorOp = rewriter.create<bufferization::ToTensorOp>(
255	loc, alloc, /restrict=/true, /writable=/true);
256	rewriter.replaceOp(padOp, toTensorOp);
257	return alloc;
258	}
259
260	Value linalg::bufferizeToAllocation(
261	RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
262	vector::MaskOp maskOp, Attribute memorySpace, Operation *insertionPoint) {
263	assert(llvm::range_size(maskOp.getMaskBlock()->without_terminator()) == `1` &&
264	"expected single masked op");
265	OpBuilder::InsertionGuard g(rewriter);
266
267	// Should the bufferization options and state be function arguments?
268	bufferization::BufferizationOptions bufferizationOptions;
269	bufferization::BufferizationState bufferizationState;
270
271	Operation *yieldOp = maskOp.getMaskRegion().front().getTerminator();
272	assert(isa<vector::YieldOp>(yieldOp) && "expected yield op terminator");
273
274	// Bufferize maskable op. By default, place the buffer allocation right before
275	// the mask op.
276	Value alloc = bufferizeToAllocation(
277	rewriter, options, maskOp.getMaskableOp(), memorySpace,
278	/insertionPoint=/insertionPoint ? insertionPoint : maskOp);
279
280	if (options.bufferizeDestinationOnly)
281	return alloc;
282
283	// Bufferize terminator.
284	rewriter.setInsertionPoint(yieldOp);
285	if (failed(cast<bufferization::BufferizableOpInterface>(yieldOp).bufferize(
286	rewriter, bufferizationOptions, bufferizationState)))
287	return nullptr;
288
289	// Erase dead to_tensor ops inside of the mask op. This is necessary because
290	// there only be one op (apart from the terminator) inside the mask op.
291	// TODO: Remove dead to_tensor ops more aggressively during bufferization.
292	SmallVector<Operation *> toTensorOps;
293	maskOp.walk([&](bufferization::ToTensorOp toTensorOp) {
294	if (toTensorOp->getUses().empty())
295	toTensorOps.push_back(Elt: toTensorOp.getOperation());
296	});
297	for (Operation *op : toTensorOps)
298	rewriter.eraseOp(op);
299
300	// Bufferize mask op.
301	SmallVector<OpOperand *> resultUses;
302	for (Value result : maskOp.getResults())
303	if (isa<TensorType>(result.getType()))
304	for (OpOperand &use : result.getUses())
305	resultUses.push_back(&use);
306	rewriter.setInsertionPoint(maskOp);
307	if (failed(
308	cast<bufferization::BufferizableOpInterface>(maskOp.getOperation())
309	.bufferize(rewriter, bufferizationOptions, bufferizationState)))
310	return nullptr;
311
312	// Set "restrict" attribute, indicating that no other tensor aliases with
313	// this tensor. That is because we just allocated a new buffer for the tensor.
314	for (OpOperand *resultUse : resultUses) {
315	auto toTensorOp =
316	resultUse->get().getDefiningOp<bufferization::ToTensorOp>();
317	assert(toTensorOp && "expected to_tensor op");
318	rewriter.modifyOpInPlace(toTensorOp, [&]() {
319	toTensorOp.setRestrict(true);
320	toTensorOp.setWritable(true);
321	});
322	}
323
324	return alloc;
325	}
326
327	Value linalg::bufferizeToAllocation(
328	RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
329	bufferization::AllocTensorOp allocTensorOp, Attribute memorySpace,
330	Operation *insertionPoint) {
331	Location loc = allocTensorOp.getLoc();
332	OpBuilder::InsertionGuard g(rewriter);
333	rewriter.setInsertionPoint(insertionPoint ? insertionPoint : allocTensorOp);
334	bufferization::BufferizationOptions bufferizationOptions;
335
336	// Create buffer allocation.
337	Value alloc = createAllocationForTensor(
338	rewriter, loc, allocTensorOp.getResult(), options, memorySpace);
339
340	// Create bufferization.to_tensor with "restrict" and "writable". The returned
341	// tensor is a new buffer allocation, so it does not alias with any buffer.
342	Value toTensorOp = rewriter.create<bufferization::ToTensorOp>(
343	loc, alloc, /restrict=/true, /writable=/true);
344	rewriter.replaceOp(allocTensorOp, toTensorOp);
345	return alloc;
346	}
347
348	/// Lower tensor.from_elements to a sequence of chained tensor.insert.
349	FailureOr<Operation *> mlir::linalg::rewriteInDestinationPassingStyle(
350	RewriterBase &rewriter, tensor::FromElementsOp fromElementsOp) {
351	Location loc = fromElementsOp.getLoc();
352	RankedTensorType tensorType =
353	cast<RankedTensorType>(fromElementsOp.getType());
354	auto shape = tensorType.getShape();
355
356	// Create tensor.empty.
357	auto emptyOp = rewriter.create<EmptyOp>(loc, tensorType, ValueRange());
358
359	// Case: tensor<elem_type>.
360	if (shape.empty()) {
361	Operation *res = rewriter.replaceOpWithNewOp<tensor::InsertOp>(
362	fromElementsOp, fromElementsOp.getElements().front(),
363	emptyOp.getResult(), ValueRange());
364	return res;
365	}
366
367	// Create constants for the range of possible indices [0, max{shape_i}).
368	auto maxDim = *llvm::max_element(shape);
369	SmallVector<Value, `2`> constants;
370	constants.reserve(N: maxDim);
371	for (int i = `0`; i < maxDim; ++i)
372	constants.push_back(rewriter.create<arith::ConstantIndexOp>(location: loc, args&: i));
373
374	// Traverse all elements and create tensor.insert ops.
375	auto elementIt = fromElementsOp.getElements().begin();
376	SmallVector<Value, `2`> indices(tensorType.getRank(), constants [`0`]);
377	Value result = createInserts(rewriter, loc, /dim=/`0`, emptyOp.getResult(),
378	shape, constants, elementIt, indices);
379
380	// Replace tensor.from_elements.
381	rewriter.replaceOp(fromElementsOp, result);
382	return result.getDefiningOp();
383	}
384
385	/// Lower tensor.generate to linalg.generic.
386	FailureOr<Operation *>
387	mlir::linalg::rewriteInDestinationPassingStyle(RewriterBase &rewriter,
388	tensor::GenerateOp generateOp) {
389	// Only ops with exactly one block are supported.
390	if (!generateOp.getBody().hasOneBlock())
391	return failure();
392
393	Location loc = generateOp.getLoc();
394	RankedTensorType tensorType = cast<RankedTensorType>(generateOp.getType());
395
396	// Create tensor.empty.
397	auto emptyOp =
398	rewriter.create<EmptyOp>(loc, tensorType, generateOp.getDynamicExtents());
399
400	// Create linalg.generic.
401	SmallVector<utils::IteratorType> iteratorTypes(tensorType.getRank(),
402	utils::IteratorType::parallel);
403	SmallVector<AffineMap> indexingMaps(
404	`1`, rewriter.getMultiDimIdentityMap(rank: tensorType.getRank()));
405	auto genericOp = rewriter.create<linalg::GenericOp>(
406	loc, tensorType, /inputs=/ValueRange (),
407	/outputs=/ValueRange{emptyOp.getResult()}, /indexingMaps=/
408	indexingMaps, iteratorTypes);
409	Block *body = rewriter.createBlock(&genericOp->getRegion(`0`), {},
410	tensorType.getElementType(), loc);
411	rewriter.setInsertionPointToStart(body);
412	SmallVector<Value> bbArgReplacements;
413	for (int64_t i = `0`; i < tensorType.getRank(); ++i)
414	bbArgReplacements.push_back(rewriter.create<linalg::IndexOp>(loc, i));
415	rewriter.mergeBlocks(source: &generateOp.getBody().front(), dest: body, argValues: bbArgReplacements);
416
417	// Update terminator.
418	auto yieldOp = cast<tensor::YieldOp>(body->getTerminator());
419	rewriter.replaceOpWithNewOp<linalg::YieldOp>(yieldOp, yieldOp.getValue());
420
421	// Replace tensor.generate.
422	rewriter.replaceOp(generateOp, genericOp->getResult(`0`));
423	return genericOp.getOperation();
424	}
425
426	/// Lower tensor.pad to linalg.generic + tensor.insert_slice.
427	FailureOr<Operation *>
428	mlir::linalg::rewriteInDestinationPassingStyle(RewriterBase &rewriter,
429	tensor::PadOp padOp) {
430	// Only ops with exactly one block are supported.
431	if (!padOp.getBodyRegion().hasOneBlock())
432	return failure();
433
434	// Create tensor.empty.
435	Location loc = padOp.getLoc();
436	RankedTensorType resultType = padOp.getResultType();
437	ReifiedRankedShapedTypeDims reifiedShape;
438	if (failed(reifyResultShapes(rewriter, padOp, reifiedShape)))
439	return rewriter.notifyMatchFailure(
440	padOp, "failed to reify tensor.pad op result shape");
441	SmallVector<Value> dynamicSizes;
442	for (int64_t i = `0`; i < resultType.getRank(); ++i)
443	if (resultType.isDynamicDim(i))
444	dynamicSizes.push_back(Elt: cast<Value>(Val&: reifiedShape [`0`][i]));
445
446	// If the `padOp` has a nofold attribute and all paddings are known to be 0,
447	// explicitly insert a `linalg.copy`.
448	if (padOp.getNofoldAttr() &&
449	llvm::all_of(padOp.getMixedLowPad(), isZeroInteger) &&
450	llvm::all_of(padOp.getMixedHighPad(), isZeroInteger)) {
451	using bufferization::AllocTensorOp;
452	Value allocated =
453	rewriter.create<AllocTensorOp>(loc, resultType, dynamicSizes);
454	auto copyOp = rewriter.replaceOpWithNewOp<linalg::CopyOp>(
455	padOp, padOp.getSource(), allocated);
456	return copyOp.getOperation();
457	}
458
459	Value empty = rewriter.create<EmptyOp>(loc, resultType, dynamicSizes);
460	// Create linalg.fill or linalg.generic.
461	Operation *fillOp = movePaddingToFillOrGenericOp(rewriter, loc, padOp, empty);
462	rewriter.setInsertionPointAfter(fillOp);
463
464	// Create tensor::InsertSliceOp.
465	SmallVector<OpFoldResult> sliceSizes =
466	getMixedSizes(rewriter, loc, padOp.getSource());
467	SmallVector<OpFoldResult> sliceStrides(resultType.getRank(),
468	rewriter.getIndexAttr(`1`));
469	auto insertSliceOp = rewriter.replaceOpWithNewOp<tensor::InsertSliceOp>(
470	padOp, padOp.getSource(), fillOp->getResult(`0`),
471	/offsets=/padOp.getMixedLowPad(), sliceSizes, sliceStrides);
472	return insertSliceOp.getOperation();
473	}
474
475	Value linalg::bufferizeToAllocation(
476	RewriterBase &rewriter, const linalg::BufferizeToAllocationOptions &options,
477	Operation op, Attribute memorySpace, Operation insertionPoint) {
478	using namespace bufferization;
479
480	// Call specialized overload for certain ops.
481	if (auto padOp = dyn_cast<tensor::PadOp>(op))
482	return bufferizeToAllocation(rewriter, options, padOp, memorySpace);
483	if (auto maskOp = dyn_cast<vector::MaskOp>(op))
484	return bufferizeToAllocation(rewriter, options, maskOp, memorySpace);
485	if (auto allocTensorOp = dyn_cast<bufferization::AllocTensorOp>(op))
486	return bufferizeToAllocation(rewriter, options, allocTensorOp, memorySpace);
487
488	// Only bufferizable ops are supported.
489	auto bufferizableOp = dyn_cast<BufferizableOpInterface>(op);
490	if (!bufferizableOp)
491	return nullptr;
492
493	// Should the bufferization options and states be function arguments?
494	BufferizationOptions bufferizationOptions;
495	AnalysisState analysisState(bufferizationOptions);
496	BufferizationState bufferizationState;
497
498	#ifndef NDEBUG
499	if (!options.bufferizeDestinationOnly) {
500	// Ops with nested tensor ops are not supported yet. At the moment, this
501	// function just bufferizes the given op itself, but not its body.
502	op->walk(callback: [&](Operation *nestedOp) {
503	if (op == nestedOp)
504	return;
505	if (llvm::any_of(Range: nestedOp->getOperands(),
506	P: [](Value v) { return isa<TensorType>(Val: v.getType()); }))
507	llvm_unreachable("ops with nested tensor ops are not supported yet");
508	if (llvm::any_of(Range: nestedOp->getResults(),
509	P: [](Value v) { return isa<TensorType>(Val: v.getType()); }))
510	llvm_unreachable("ops with nested tensor ops are not supported yet");
511	});
512	}
513	#endif // NDEBUG
514
515	// Gather tensor results.
516	SmallVector<OpResult> tensorResults;
517	for (OpResult result : op->getResults()) {
518	if (!isa<TensorType>(Val: result.getType()))
519	continue;
520	// Unranked tensors are not supported
521	if (!isa<RankedTensorType>(Val: result.getType()))
522	return nullptr;
523	// Ops that bufferize to an allocation are not supported.
524	if (bufferizableOp.bufferizesToAllocation(result))
525	return nullptr;
526	tensorResults.push_back(Elt: result);
527	}
528
529	// Gather all operands that should bufferize to a new allocation. I.e.,
530	// bufferize out-of-place.
531	SmallVector<OpOperand *> outOfPlaceOperands, resultUses;
532	auto addOutOfPlaceOperand = [&](OpOperand *operand) {
533	if (!llvm::is_contained(Range&: outOfPlaceOperands, Element: operand))
534	outOfPlaceOperands.push_back(Elt: operand);
535	};
536	for (OpResult result : tensorResults) {
537	AliasingOpOperandList aliasingOperands =
538	analysisState.getAliasingOpOperands(result);
539	for (const AliasingOpOperand &operand : aliasingOperands) {
540	addOutOfPlaceOperand(operand.opOperand);
541	for (OpOperand &resultUse : result.getUses())
542	resultUses.push_back(&resultUse);
543	}
544	}
545	for (OpOperand &operand : op->getOpOperands()) {
546	if (!analysisState.bufferizesToMemoryWrite(opOperand&: operand))
547	continue;
548	if (!isa<RankedTensorType>(Val: operand.get().getType()))
549	continue;
550	addOutOfPlaceOperand (&operand);
551	}
552	// TODO: Support multiple buffers.
553	if (outOfPlaceOperands.size() != `1`)
554	return nullptr;
555
556	// Allocate buffers.
557	OpBuilder::InsertionGuard g(rewriter);
558	rewriter.setInsertionPoint(insertionPoint ? insertionPoint : op);
559	SmallVector<Value> allocs;
560	for (OpOperand *operand : outOfPlaceOperands) {
561	Value alloc = createAllocationForTensor(
562	rewriter, loc: op->getLoc(), value: operand->get(), options, memorySpace);
563	allocs.push_back(Elt: alloc);
564	if (!analysisState.findDefinitions(operand).empty()) {
565	// Initialize buffer with a copy of the operand data. Not needed if the
566	// tensor is uninitialized.
567	createMemcpy(b&: rewriter, loc: op->getLoc(), tensorSource: operand->get(), memrefDest: alloc, options);
568	}
569	rewriter.modifyOpInPlace(root: op, callable: [&]() {
570	auto toTensorOp = rewriter.create<ToTensorOp>(op->getLoc(), alloc);
571	operand->set(toTensorOp);
572	if (options.bufferizeDestinationOnly) {
573	rewriter.modifyOpInPlace(toTensorOp, [&]() {
574	toTensorOp.setRestrict(true);
575	toTensorOp.setWritable(true);
576	});
577	}
578	});
579	}
580
581	if (options.bufferizeDestinationOnly)
582	return allocs.front();
583
584	// Bufferize the op.
585	rewriter.setInsertionPoint(op);
586	if (failed(bufferizableOp.bufferize(rewriter, bufferizationOptions,
587	bufferizationState)))
588	return nullptr;
589
590	// Set "restrict" attribute, indicating that no other tensor aliases with
591	// this tensor. That is because we just allocated a new buffer for the tensor.
592	for (OpOperand *resultUse : resultUses) {
593	auto toTensorOp = resultUse->get().getDefiningOp<ToTensorOp>();
594	assert(toTensorOp && "expected to_tensor op");
595	rewriter.modifyOpInPlace(toTensorOp, [&]() {
596	toTensorOp.setRestrict(true);
597	toTensorOp.setWritable(true);
598	});
599	}
600	return allocs.front();
601	}
602
603	namespace {
604
605	template <typename OpTy>
606	LogicalResult rewriteOpInDestinationPassingStyle(OpTy op,
607	PatternRewriter &rewriter) {
608	return linalg::rewriteInDestinationPassingStyle(rewriter, op);
609	}
610
611	} // namespace
612
613	void linalg::populateConvertToDestinationStylePatterns(
614	RewritePatternSet &patterns) {
615	patterns.add(rewriteOpInDestinationPassingStyle<tensor::FromElementsOp>);
616	patterns.add(rewriteOpInDestinationPassingStyle<tensor::GenerateOp>);
617	patterns.add(rewriteOpInDestinationPassingStyle<tensor::PadOp>);
618	}
619

Provided by KDAB

Update your C++ knowledge – Modern C++11/14/17 Training

Find out more

Definitions

source code of mlir/lib/Dialect/Linalg/Transforms/ConvertToDestinationStyle.cpp