Utils.cpp source code [mlir/lib/Dialect/Affine/Analysis/Utils.cpp]

1	//===- Utils.cpp ---- Misc utilities for analysis -------------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements miscellaneous analysis routines for non-loop IR
10	// structures.
11	//
12	//===----------------------------------------------------------------------===//
13
14	#include "mlir/Dialect/Affine/Analysis/Utils.h"
15	#include "mlir/Analysis/Presburger/PresburgerRelation.h"
16	#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
17	#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
18	#include "mlir/Dialect/Affine/IR/AffineOps.h"
19	#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
20	#include "mlir/Dialect/Arith/IR/Arith.h"
21	#include "mlir/Dialect/Utils/StaticValueUtils.h"
22	#include "mlir/IR/IntegerSet.h"
23	#include "mlir/Interfaces/CallInterfaces.h"
24	#include "llvm/ADT/SetVector.h"
25	#include "llvm/ADT/SmallPtrSet.h"
26	#include "llvm/Support/Debug.h"
27	#include "llvm/Support/raw_ostream.h"
28	#include <optional>
29
30	#define DEBUG_TYPE "analysis-utils"
31
32	using namespace mlir;
33	using namespace affine;
34	using namespace presburger;
35
36	using llvm::SmallDenseMap;
37
38	using Node = MemRefDependenceGraph::Node;
39
40	// LoopNestStateCollector walks loop nests and collects load and store
41	// operations, and whether or not a region holding op other than ForOp and IfOp
42	// was encountered in the loop nest.
43	void LoopNestStateCollector::collect(Operation *opToWalk) {
44	opToWalk->walk(callback: [&](Operation *op) {
45	if (isa<AffineForOp>(Val: op))
46	forOps.push_back(cast<AffineForOp>(op));
47	else if (op->getNumRegions() != `0` && !isa<AffineIfOp>(Val: op))
48	hasNonAffineRegionOp = true;
49	else if (isa<AffineReadOpInterface>(op))
50	loadOpInsts.push_back(Elt: op);
51	else if (isa<AffineWriteOpInterface>(op))
52	storeOpInsts.push_back(Elt: op);
53	});
54	}
55
56	// Returns the load op count for 'memref'.
57	unsigned Node::getLoadOpCount(Value memref) const {
58	unsigned loadOpCount = `0`;
59	for (Operation *loadOp : loads) {
60	if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
61	++loadOpCount;
62	}
63	return loadOpCount;
64	}
65
66	// Returns the store op count for 'memref'.
67	unsigned Node::getStoreOpCount(Value memref) const {
68	unsigned storeOpCount = `0`;
69	for (Operation *storeOp : stores) {
70	if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
71	++storeOpCount;
72	}
73	return storeOpCount;
74	}
75
76	// Returns all store ops in 'storeOps' which access 'memref'.
77	void Node::getStoreOpsForMemref(Value memref,
78	SmallVectorImpl<Operation > storeOps) const {
79	for (Operation *storeOp : stores) {
80	if (memref == cast<AffineWriteOpInterface>(storeOp).getMemRef())
81	storeOps->push_back(Elt: storeOp);
82	}
83	}
84
85	// Returns all load ops in 'loadOps' which access 'memref'.
86	void Node::getLoadOpsForMemref(Value memref,
87	SmallVectorImpl<Operation > loadOps) const {
88	for (Operation *loadOp : loads) {
89	if (memref == cast<AffineReadOpInterface>(loadOp).getMemRef())
90	loadOps->push_back(Elt: loadOp);
91	}
92	}
93
94	// Returns all memrefs in 'loadAndStoreMemrefSet' for which this node
95	// has at least one load and store operation.
96	void Node::getLoadAndStoreMemrefSet(
97	DenseSet<Value> loadAndStoreMemrefSet) const* {
98	llvm::SmallDenseSet<Value, `2`> loadMemrefs;
99	for (Operation *loadOp : loads) {
100	loadMemrefs.insert(cast<AffineReadOpInterface>(loadOp).getMemRef());
101	}
102	for (Operation *storeOp : stores) {
103	auto memref = cast<AffineWriteOpInterface>(storeOp).getMemRef();
104	if (loadMemrefs.count(V: memref) > `0`)
105	loadAndStoreMemrefSet->insert(memref);
106	}
107	}
108
109	// Initializes the data dependence graph by walking operations in `block`.
110	// Assigns each node in the graph a node id based on program order in 'f'.
111	bool MemRefDependenceGraph::init() {
112	LLVM_DEBUG(llvm::dbgs() << "--- Initializing MDG ---\n");
113	// Map from a memref to the set of ids of the nodes that have ops accessing
114	// the memref.
115	DenseMap<Value, SetVector<unsigned>> memrefAccesses;
116
117	DenseMap<Operation , unsigned*> forToNodeMap;
118	for (Operation &op : block) {
119	if (dyn_cast<AffineForOp>(op)) {
120	// Create graph node 'id' to represent top-level 'forOp' and record
121	// all loads and store accesses it contains.
122	LoopNestStateCollector collector;
123	collector.collect(opToWalk: &op);
124	// Return false if a region holding op other than 'affine.for' and
125	// 'affine.if' was found (not currently supported).
126	if (collector.hasNonAffineRegionOp)
127	return false;
128	Node node(nextNodeId++, &op);
129	for (auto *opInst : collector.loadOpInsts) {
130	node.loads.push_back(Elt: opInst);
131	auto memref = cast<AffineReadOpInterface>(opInst).getMemRef();
132	memrefAccesses[memref].insert(node.id);
133	}
134	for (auto *opInst : collector.storeOpInsts) {
135	node.stores.push_back(Elt: opInst);
136	auto memref = cast<AffineWriteOpInterface>(opInst).getMemRef();
137	memrefAccesses[memref].insert(node.id);
138	}
139	forToNodeMap [&op] = node.id;
140	nodes.insert(KV: {node.id, node});
141	} else if (dyn_cast<AffineReadOpInterface>(op)) {
142	// Create graph node for top-level load op.
143	Node node(nextNodeId++, &op);
144	node.loads.push_back(Elt: &op);
145	auto memref = cast<AffineReadOpInterface>(op).getMemRef();
146	memrefAccesses[memref].insert(node.id);
147	nodes.insert(KV: {node.id, node});
148	} else if (dyn_cast<AffineWriteOpInterface>(op)) {
149	// Create graph node for top-level store op.
150	Node node(nextNodeId++, &op);
151	node.stores.push_back(Elt: &op);
152	auto memref = cast<AffineWriteOpInterface>(op).getMemRef();
153	memrefAccesses[memref].insert(node.id);
154	nodes.insert(KV: {node.id, node});
155	} else if (op.getNumResults() > `0` && !op.use_empty()) {
156	// Create graph node for top-level producer of SSA values, which
157	// could be used by loop nest nodes.
158	Node node(nextNodeId++, &op);
159	nodes.insert(KV: {node.id, node});
160	} else if (!isMemoryEffectFree(op: &op) &&
161	(op.getNumRegions() == `0` \|\| isa<RegionBranchOpInterface>(Val: op))) {
162	// Create graph node for top-level op unless it is known to be
163	// memory-effect free. This covers all unknown/unregistered ops,
164	// non-affine ops with memory effects, and region-holding ops with a
165	// well-defined control flow. During the fusion validity checks, we look
166	// for non-affine ops on the path from source to destination, at which
167	// point we check which memrefs if any are used in the region.
168	Node node(nextNodeId++, &op);
169	nodes.insert(KV: {node.id, node});
170	} else if (op.getNumRegions() != `0`) {
171	// Return false if non-handled/unknown region-holding ops are found. We
172	// won't know what such ops do or what its regions mean; for e.g., it may
173	// not be an imperative op.
174	LLVM_DEBUG(llvm::dbgs()
175	<< "MDG init failed; unknown region-holding op found!\n");
176	return false;
177	}
178	}
179
180	for (auto &idAndNode : nodes) {
181	LLVM_DEBUG(llvm::dbgs() << "Create node " << idAndNode.first << " for:\n"
182	<< *(idAndNode.second.op) << "\n");
183	(void)idAndNode;
184	}
185
186	// Add dependence edges between nodes which produce SSA values and their
187	// users. Load ops can be considered as the ones producing SSA values.
188	for (auto &idAndNode : nodes) {
189	const Node &node = idAndNode.second;
190	// Stores don't define SSA values, skip them.
191	if (!node.stores.empty())
192	continue;
193	Operation *opInst = node.op;
194	for (Value value : opInst->getResults()) {
195	for (Operation *user : value.getUsers()) {
196	// Ignore users outside of the block.
197	if (block.getParent()->findAncestorOpInRegion(op&: *user)->getBlock() !=
198	&block)
199	continue;
200	SmallVector<AffineForOp, `4`> loops;
201	getAffineForIVs(*user, &loops);
202	// Find the surrounding affine.for nested immediately within the
203	// block.
204	auto *it = llvm::find_if(loops, [&](AffineForOp loop) {
205	return loop->getBlock() == &block;
206	});
207	if (it == loops.end())
208	continue;
209	assert(forToNodeMap.count(*it) > `0` && "missing mapping");
210	unsigned userLoopNestId = forToNodeMap[*it];
211	addEdge(srcId: node.id, dstId: userLoopNestId, value);
212	}
213	}
214	}
215
216	// Walk memref access lists and add graph edges between dependent nodes.
217	for (auto &memrefAndList : memrefAccesses) {
218	unsigned n = memrefAndList.second.size();
219	for (unsigned i = `0`; i < n; ++i) {
220	unsigned srcId = memrefAndList.second [i];
221	bool srcHasStore =
222	getNode(id: srcId)->getStoreOpCount(memref: memrefAndList.first) > `0`;
223	for (unsigned j = i + `1`; j < n; ++j) {
224	unsigned dstId = memrefAndList.second [j];
225	bool dstHasStore =
226	getNode(id: dstId)->getStoreOpCount(memref: memrefAndList.first) > `0`;
227	if (srcHasStore \|\| dstHasStore)
228	addEdge(srcId, dstId, value: memrefAndList.first);
229	}
230	}
231	}
232	return true;
233	}
234
235	// Returns the graph node for 'id'.
236	Node MemRefDependenceGraph::getNode(unsigned* id) {
237	auto it = nodes.find(Val: id);
238	assert(it != nodes.end());
239	return &it ->second;
240	}
241
242	// Returns the graph node for 'forOp'.
243	Node *MemRefDependenceGraph::getForOpNode(AffineForOp forOp) {
244	for (auto &idAndNode : nodes)
245	if (idAndNode.second.op == forOp)
246	return &idAndNode.second;
247	return nullptr;
248	}
249
250	// Adds a node with 'op' to the graph and returns its unique identifier.
251	unsigned MemRefDependenceGraph::addNode(Operation *op) {
252	Node node(nextNodeId++, op);
253	nodes.insert(KV: {node.id, node});
254	return node.id;
255	}
256
257	// Remove node 'id' (and its associated edges) from graph.
258	void MemRefDependenceGraph::removeNode(unsigned id) {
259	// Remove each edge in 'inEdges[id]'.
260	if (inEdges.count(Val: id) > `0`) {
261	SmallVector<Edge, `2`> oldInEdges = inEdges [id];
262	for (auto &inEdge : oldInEdges) {
263	removeEdge(srcId: inEdge.id, dstId: id, value: inEdge.value);
264	}
265	}
266	// Remove each edge in 'outEdges[id]'.
267	if (outEdges.count(Val: id) > `0`) {
268	SmallVector<Edge, `2`> oldOutEdges = outEdges [id];
269	for (auto &outEdge : oldOutEdges) {
270	removeEdge(srcId: id, dstId: outEdge.id, value: outEdge.value);
271	}
272	}
273	// Erase remaining node state.
274	inEdges.erase(Val: id);
275	outEdges.erase(Val: id);
276	nodes.erase(Val: id);
277	}
278
279	// Returns true if node 'id' writes to any memref which escapes (or is an
280	// argument to) the block. Returns false otherwise.
281	bool MemRefDependenceGraph::writesToLiveInOrEscapingMemrefs(unsigned id) {
282	Node *node = getNode(id);
283	for (auto *storeOpInst : node->stores) {
284	auto memref = cast<AffineWriteOpInterface>(storeOpInst).getMemRef();
285	auto *op = memref.getDefiningOp();
286	// Return true if 'memref' is a block argument.
287	if (!op)
288	return true;
289	// Return true if any use of 'memref' does not deference it in an affine
290	// way.
291	for (auto *user : memref.getUsers())
292	if (!isa<AffineMapAccessInterface>(*user))
293	return true;
294	}
295	return false;
296	}
297
298	// Returns true iff there is an edge from node 'srcId' to node 'dstId' which
299	// is for 'value' if non-null, or for any value otherwise. Returns false
300	// otherwise.
301	bool MemRefDependenceGraph::hasEdge(unsigned srcId, unsigned dstId,
302	Value value) {
303	if (outEdges.count(Val: srcId) == `0` \|\| inEdges.count(Val: dstId) == `0`) {
304	return false;
305	}
306	bool hasOutEdge = llvm::any_of(Range&: outEdges [srcId], P: [=](Edge &edge) {
307	return edge.id == dstId && (!value \|\| edge.value == value);
308	});
309	bool hasInEdge = llvm::any_of(Range&: inEdges [dstId], P: [=](Edge &edge) {
310	return edge.id == srcId && (!value \|\| edge.value == value);
311	});
312	return hasOutEdge && hasInEdge;
313	}
314
315	// Adds an edge from node 'srcId' to node 'dstId' for 'value'.
316	void MemRefDependenceGraph::addEdge(unsigned srcId, unsigned dstId,
317	Value value) {
318	if (!hasEdge(srcId, dstId, value)) {
319	outEdges [srcId].push_back(Elt: {.id: dstId, .value: value});
320	inEdges [dstId].push_back(Elt: {.id: srcId, .value: value});
321	if (isa<MemRefType>(Val: value.getType()))
322	memrefEdgeCount [value]++;
323	}
324	}
325
326	// Removes an edge from node 'srcId' to node 'dstId' for 'value'.
327	void MemRefDependenceGraph::removeEdge(unsigned srcId, unsigned dstId,
328	Value value) {
329	assert(inEdges.count(dstId) > `0`);
330	assert(outEdges.count(srcId) > `0`);
331	if (isa<MemRefType>(Val: value.getType())) {
332	assert(memrefEdgeCount.count(value) > `0`);
333	memrefEdgeCount [value]--;
334	}
335	// Remove 'srcId' from 'inEdges[dstId]'.
336	for (auto *it = inEdges [dstId].begin(); it != inEdges [dstId].end(); ++it) {
337	if ((it).id == srcId && (it).value == value) {
338	inEdges [dstId].erase(CI: it);
339	break;
340	}
341	}
342	// Remove 'dstId' from 'outEdges[srcId]'.
343	for (auto *it = outEdges [srcId].begin(); it != outEdges [srcId].end(); ++it) {
344	if ((it).id == dstId && (it).value == value) {
345	outEdges [srcId].erase(CI: it);
346	break;
347	}
348	}
349	}
350
351	// Returns true if there is a path in the dependence graph from node 'srcId'
352	// to node 'dstId'. Returns false otherwise. `srcId`, `dstId`, and the
353	// operations that the edges connected are expected to be from the same block.
354	bool MemRefDependenceGraph::hasDependencePath(unsigned srcId, unsigned dstId) {
355	// Worklist state is: <node-id, next-output-edge-index-to-visit>
356	SmallVector<std::pair<unsigned, unsigned>, `4`> worklist;
357	worklist.push_back(Elt: {srcId, `0`});
358	Operation *dstOp = getNode(id: dstId)->op;
359	// Run DFS traversal to see if 'dstId' is reachable from 'srcId'.
360	while (!worklist.empty()) {
361	auto &idAndIndex = worklist.back();
362	// Return true if we have reached 'dstId'.
363	if (idAndIndex.first == dstId)
364	return true;
365	// Pop and continue if node has no out edges, or if all out edges have
366	// already been visited.
367	if (outEdges.count(Val: idAndIndex.first) == `0` \|\|
368	idAndIndex.second == outEdges [idAndIndex.first].size()) {
369	worklist.pop_back();
370	continue;
371	}
372	// Get graph edge to traverse.
373	Edge edge = outEdges [idAndIndex.first][idAndIndex.second];
374	// Increment next output edge index for 'idAndIndex'.
375	++idAndIndex.second;
376	// Add node at 'edge.id' to the worklist. We don't need to consider
377	// nodes that are "after" dstId in the containing block; one can't have a
378	// path to `dstId` from any of those nodes.
379	bool afterDst = dstOp->isBeforeInBlock(other: getNode(id: edge.id)->op);
380	if (!afterDst && edge.id != idAndIndex.first)
381	worklist.push_back(Elt: {edge.id, `0`});
382	}
383	return false;
384	}
385
386	// Returns the input edge count for node 'id' and 'memref' from src nodes
387	// which access 'memref' with a store operation.
388	unsigned MemRefDependenceGraph::getIncomingMemRefAccesses(unsigned id,
389	Value memref) {
390	unsigned inEdgeCount = `0`;
391	if (inEdges.count(Val: id) > `0`)
392	for (auto &inEdge : inEdges [id])
393	if (inEdge.value == memref) {
394	Node *srcNode = getNode(id: inEdge.id);
395	// Only count in edges from 'srcNode' if 'srcNode' accesses 'memref'
396	if (srcNode->getStoreOpCount(memref) > `0`)
397	++inEdgeCount;
398	}
399	return inEdgeCount;
400	}
401
402	// Returns the output edge count for node 'id' and 'memref' (if non-null),
403	// otherwise returns the total output edge count from node 'id'.
404	unsigned MemRefDependenceGraph::getOutEdgeCount(unsigned id, Value memref) {
405	unsigned outEdgeCount = `0`;
406	if (outEdges.count(Val: id) > `0`)
407	for (auto &outEdge : outEdges [id])
408	if (!memref \|\| outEdge.value == memref)
409	++outEdgeCount;
410	return outEdgeCount;
411	}
412
413	/// Return all nodes which define SSA values used in node 'id'.
414	void MemRefDependenceGraph::gatherDefiningNodes(
415	unsigned id, DenseSet<unsigned> &definingNodes) {
416	for (MemRefDependenceGraph::Edge edge : inEdges [id])
417	// By definition of edge, if the edge value is a non-memref value,
418	// then the dependence is between a graph node which defines an SSA value
419	// and another graph node which uses the SSA value.
420	if (!isa<MemRefType>(Val: edge.value.getType()))
421	definingNodes.insert(V: edge.id);
422	}
423
424	// Computes and returns an insertion point operation, before which the
425	// the fused <srcId, dstId> loop nest can be inserted while preserving
426	// dependences. Returns nullptr if no such insertion point is found.
427	Operation *
428	MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId,
429	unsigned dstId) {
430	if (outEdges.count(Val: srcId) == `0`)
431	return getNode(id: dstId)->op;
432
433	// Skip if there is any defining node of 'dstId' that depends on 'srcId'.
434	DenseSet<unsigned> definingNodes;
435	gatherDefiningNodes(id: dstId, definingNodes);
436	if (llvm::any_of(Range&: definingNodes,
437	P: [&](unsigned id) { return hasDependencePath(srcId, dstId: id); })) {
438	LLVM_DEBUG(llvm::dbgs()
439	<< "Can't fuse: a defining op with a user in the dst "
440	"loop has dependence from the src loop\n");
441	return nullptr;
442	}
443
444	// Build set of insts in range (srcId, dstId) which depend on 'srcId'.
445	SmallPtrSet<Operation *, `2`> srcDepInsts;
446	for (auto &outEdge : outEdges [srcId])
447	if (outEdge.id != dstId)
448	srcDepInsts.insert(Ptr: getNode(id: outEdge.id)->op);
449
450	// Build set of insts in range (srcId, dstId) on which 'dstId' depends.
451	SmallPtrSet<Operation *, `2`> dstDepInsts;
452	for (auto &inEdge : inEdges [dstId])
453	if (inEdge.id != srcId)
454	dstDepInsts.insert(Ptr: getNode(id: inEdge.id)->op);
455
456	Operation *srcNodeInst = getNode(id: srcId)->op;
457	Operation *dstNodeInst = getNode(id: dstId)->op;
458
459	// Computing insertion point:
460	// ) Walk all operation positions in Block operation list in the*
461	// range (src, dst). For each operation 'op' visited in this search:
462	// ) Store in 'firstSrcDepPos' the first position where 'op' has a*
463	// dependence edge from 'srcNode'.
464	// ) Store in 'lastDstDepPost' the last position where 'op' has a*
465	// dependence edge to 'dstNode'.
466	// ) Compare 'firstSrcDepPos' and 'lastDstDepPost' to determine the*
467	// operation insertion point (or return null pointer if no such
468	// insertion point exists: 'firstSrcDepPos' <= 'lastDstDepPos').
469	SmallVector<Operation *, `2`> depInsts;
470	std::optional<unsigned> firstSrcDepPos;
471	std::optional<unsigned> lastDstDepPos;
472	unsigned pos = `0`;
473	for (Block::iterator it = std::next(x: Block::iterator (srcNodeInst));
474	it != Block::iterator (dstNodeInst); ++it) {
475	Operation op = &(it);
476	if (srcDepInsts.count(Ptr: op) > `0` && firstSrcDepPos == std::nullopt)
477	firstSrcDepPos = pos;
478	if (dstDepInsts.count(Ptr: op) > `0`)
479	lastDstDepPos = pos;
480	depInsts.push_back(Elt: op);
481	++pos;
482	}
483
484	if (firstSrcDepPos.has_value()) {
485	if (lastDstDepPos.has_value()) {
486	if (firstSrcDepPos <= lastDstDepPos) {
487	// No valid insertion point exists which preserves dependences.
488	return nullptr;
489	}
490	}
491	// Return the insertion point at 'firstSrcDepPos'.
492	return depInsts [*firstSrcDepPos];
493	}
494	// No dependence targets in range (or only dst deps in range), return
495	// 'dstNodInst' insertion point.
496	return dstNodeInst;
497	}
498
499	// Updates edge mappings from node 'srcId' to node 'dstId' after fusing them,
500	// taking into account that:
501	// ) if 'removeSrcId' is true, 'srcId' will be removed after fusion,*
502	// ) memrefs in 'privateMemRefs' has been replaced in node at 'dstId' by a*
503	// private memref.
504	void MemRefDependenceGraph::updateEdges(unsigned srcId, unsigned dstId,
505	const DenseSet<Value> &privateMemRefs,
506	bool removeSrcId) {
507	// For each edge in 'inEdges[srcId]': add new edge remapping to 'dstId'.
508	if (inEdges.count(Val: srcId) > `0`) {
509	SmallVector<Edge, `2`> oldInEdges = inEdges [srcId];
510	for (auto &inEdge : oldInEdges) {
511	// Add edge from 'inEdge.id' to 'dstId' if it's not a private memref.
512	if (privateMemRefs.count(V: inEdge.value) == `0`)
513	addEdge(srcId: inEdge.id, dstId, value: inEdge.value);
514	}
515	}
516	// For each edge in 'outEdges[srcId]': remove edge from 'srcId' to 'dstId'.
517	// If 'srcId' is going to be removed, remap all the out edges to 'dstId'.
518	if (outEdges.count(Val: srcId) > `0`) {
519	SmallVector<Edge, `2`> oldOutEdges = outEdges [srcId];
520	for (auto &outEdge : oldOutEdges) {
521	// Remove any out edges from 'srcId' to 'dstId' across memrefs.
522	if (outEdge.id == dstId)
523	removeEdge(srcId, dstId: outEdge.id, value: outEdge.value);
524	else if (removeSrcId) {
525	addEdge(srcId: dstId, dstId: outEdge.id, value: outEdge.value);
526	removeEdge(srcId, dstId: outEdge.id, value: outEdge.value);
527	}
528	}
529	}
530	// Remove any edges in 'inEdges[dstId]' on 'oldMemRef' (which is being
531	// replaced by a private memref). These edges could come from nodes
532	// other than 'srcId' which were removed in the previous step.
533	if (inEdges.count(Val: dstId) > `0` && !privateMemRefs.empty()) {
534	SmallVector<Edge, `2`> oldInEdges = inEdges [dstId];
535	for (auto &inEdge : oldInEdges)
536	if (privateMemRefs.count(V: inEdge.value) > `0`)
537	removeEdge(srcId: inEdge.id, dstId, value: inEdge.value);
538	}
539	}
540
541	// Update edge mappings for nodes 'sibId' and 'dstId' to reflect fusion
542	// of sibling node 'sibId' into node 'dstId'.
543	void MemRefDependenceGraph::updateEdges(unsigned sibId, unsigned dstId) {
544	// For each edge in 'inEdges[sibId]':
545	// ) Add new edge from source node 'inEdge.id' to 'dstNode'.*
546	// ) Remove edge from source node 'inEdge.id' to 'sibNode'.*
547	if (inEdges.count(Val: sibId) > `0`) {
548	SmallVector<Edge, `2`> oldInEdges = inEdges [sibId];
549	for (auto &inEdge : oldInEdges) {
550	addEdge(srcId: inEdge.id, dstId, value: inEdge.value);
551	removeEdge(srcId: inEdge.id, dstId: sibId, value: inEdge.value);
552	}
553	}
554
555	// For each edge in 'outEdges[sibId]' to node 'id'
556	// ) Add new edge from 'dstId' to 'outEdge.id'.*
557	// ) Remove edge from 'sibId' to 'outEdge.id'.*
558	if (outEdges.count(Val: sibId) > `0`) {
559	SmallVector<Edge, `2`> oldOutEdges = outEdges [sibId];
560	for (auto &outEdge : oldOutEdges) {
561	addEdge(srcId: dstId, dstId: outEdge.id, value: outEdge.value);
562	removeEdge(srcId: sibId, dstId: outEdge.id, value: outEdge.value);
563	}
564	}
565	}
566
567	// Adds ops in 'loads' and 'stores' to node at 'id'.
568	void MemRefDependenceGraph::addToNode(
569	unsigned id, const SmallVectorImpl<Operation *> &loads,
570	const SmallVectorImpl<Operation *> &stores) {
571	Node *node = getNode(id);
572	llvm::append_range(C&: node->loads, R: loads);
573	llvm::append_range(C&: node->stores, R: stores);
574	}
575
576	void MemRefDependenceGraph::clearNodeLoadAndStores(unsigned id) {
577	Node *node = getNode(id);
578	node->loads.clear();
579	node->stores.clear();
580	}
581
582	// Calls 'callback' for each input edge incident to node 'id' which carries a
583	// memref dependence.
584	void MemRefDependenceGraph::forEachMemRefInputEdge(
585	unsigned id, const std::function<void(Edge)> &callback) {
586	if (inEdges.count(Val: id) > `0`)
587	forEachMemRefEdge(edges: inEdges [id], callback);
588	}
589
590	// Calls 'callback' for each output edge from node 'id' which carries a
591	// memref dependence.
592	void MemRefDependenceGraph::forEachMemRefOutputEdge(
593	unsigned id, const std::function<void(Edge)> &callback) {
594	if (outEdges.count(Val: id) > `0`)
595	forEachMemRefEdge(edges: outEdges [id], callback);
596	}
597
598	// Calls 'callback' for each edge in 'edges' which carries a memref
599	// dependence.
600	void MemRefDependenceGraph::forEachMemRefEdge(
601	ArrayRef<Edge> edges, const std::function<void(Edge)> &callback) {
602	for (const auto &edge : edges) {
603	// Skip if 'edge' is not a memref dependence edge.
604	if (!isa<MemRefType>(Val: edge.value.getType()))
605	continue;
606	assert(nodes.count(edge.id) > `0`);
607	// Skip if 'edge.id' is not a loop nest.
608	if (!isa<AffineForOp>(Val: getNode(id: edge.id)->op))
609	continue;
610	// Visit current input edge 'edge'.
611	callback (edge);
612	}
613	}
614
615	void MemRefDependenceGraph::print(raw_ostream &os) const {
616	os << "\nMemRefDependenceGraph\n";
617	os << "\nNodes:\n";
618	for (const auto &idAndNode : nodes) {
619	os << "Node: " << idAndNode.first << "\n";
620	auto it = inEdges.find(Val: idAndNode.first);
621	if (it != inEdges.end()) {
622	for (const auto &e : it ->second)
623	os << " InEdge: " << e.id << " " << e.value << "\n";
624	}
625	it = outEdges.find(Val: idAndNode.first);
626	if (it != outEdges.end()) {
627	for (const auto &e : it ->second)
628	os << " OutEdge: " << e.id << " " << e.value << "\n";
629	}
630	}
631	}
632
633	void mlir::affine::getAffineForIVs(Operation &op,
634	SmallVectorImpl<AffineForOp> *loops) {
635	auto *currOp = op.getParentOp();
636	AffineForOp currAffineForOp;
637	// Traverse up the hierarchy collecting all 'affine.for' operation while
638	// skipping over 'affine.if' operations.
639	while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
640	if (auto currAffineForOp = dyn_cast<AffineForOp>(currOp))
641	loops->push_back(currAffineForOp);
642	currOp = currOp->getParentOp();
643	}
644	std::reverse(loops->begin(), loops->end());
645	}
646
647	void mlir::affine::getEnclosingAffineOps(Operation &op,
648	SmallVectorImpl<Operation > ops) {
649	ops->clear();
650	Operation *currOp = op.getParentOp();
651
652	// Traverse up the hierarchy collecting all `affine.for`, `affine.if`, and
653	// affine.parallel operations.
654	while (currOp && !currOp->hasTrait<OpTrait::AffineScope>()) {
655	if (isa<AffineIfOp, AffineForOp, AffineParallelOp>(Val: currOp))
656	ops->push_back(Elt: currOp);
657	currOp = currOp->getParentOp();
658	}
659	std::reverse(first: ops->begin(), last: ops->end());
660	}
661
662	// Populates 'cst' with FlatAffineValueConstraints which represent original
663	// domain of the loop bounds that define 'ivs'.
664	LogicalResult ComputationSliceState::getSourceAsConstraints(
665	FlatAffineValueConstraints &cst) const {
666	assert(!ivs.empty() && "Cannot have a slice without its IVs");
667	cst = FlatAffineValueConstraints (/numDims=/ivs.size(), /numSymbols=/`0`,
668	/numLocals=/`0`, ivs);
669	for (Value iv : ivs) {
670	AffineForOp loop = getForInductionVarOwner(iv);
671	assert(loop && "Expected affine for");
672	if (failed(cst.addAffineForOpDomain(forOp: loop)))
673	return failure();
674	}
675	return success();
676	}
677
678	// Populates 'cst' with FlatAffineValueConstraints which represent slice bounds.
679	LogicalResult
680	ComputationSliceState::getAsConstraints(FlatAffineValueConstraints cst) const* {
681	assert(!lbOperands.empty());
682	// Adds src 'ivs' as dimension variables in 'cst'.
683	unsigned numDims = ivs.size();
684	// Adds operands (dst ivs and symbols) as symbols in 'cst'.
685	unsigned numSymbols = lbOperands [`0`].size();
686
687	SmallVector<Value, `4`> values(ivs);
688	// Append 'ivs' then 'operands' to 'values'.
689	values.append(in_start: lbOperands [`0`].begin(), in_end: lbOperands [`0`].end());
690	*cst = FlatAffineValueConstraints (numDims, numSymbols, `0`, values);
691
692	// Add loop bound constraints for values which are loop IVs of the destination
693	// of fusion and equality constraints for symbols which are constants.
694	for (unsigned i = numDims, end = values.size(); i < end; ++i) {
695	Value value = values [i];
696	assert(cst->containsVar(value) && "value expected to be present");
697	if (isValidSymbol(value)) {
698	// Check if the symbol is a constant.
699	if (std::optional<int64_t> cOp = getConstantIntValue(ofr: value))
700	cst->addBound(type: BoundType::EQ, val: value, value: cOp.value());
701	} else if (auto loop = getForInductionVarOwner(value)) {
702	if (failed(cst->addAffineForOpDomain(forOp: loop)))
703	return failure();
704	}
705	}
706
707	// Add slices bounds on 'ivs' using maps 'lbs'/'ubs' with 'lbOperands[0]'
708	LogicalResult ret = cst->addSliceBounds(values: ivs, lbMaps: lbs, ubMaps: ubs, operands: lbOperands [`0`]);
709	assert(succeeded(ret) &&
710	"should not fail as we never have semi-affine slice maps");
711	(void)ret;
712	return success();
713	}
714
715	// Clears state bounds and operand state.
716	void ComputationSliceState::clearBounds() {
717	lbs.clear();
718	ubs.clear();
719	lbOperands.clear();
720	ubOperands.clear();
721	}
722
723	void ComputationSliceState::dump() const {
724	llvm::errs() << "\tIVs:\n";
725	for (Value iv : ivs)
726	llvm::errs() << "\t\t" << iv << "\n";
727
728	llvm::errs() << "\tLBs:\n";
729	for (auto en : llvm::enumerate(First: lbs)) {
730	llvm::errs() << "\t\t" << en.value() << "\n";
731	llvm::errs() << "\t\tOperands:\n";
732	for (Value lbOp : lbOperands [en.index()])
733	llvm::errs() << "\t\t\t" << lbOp << "\n";
734	}
735
736	llvm::errs() << "\tUBs:\n";
737	for (auto en : llvm::enumerate(First: ubs)) {
738	llvm::errs() << "\t\t" << en.value() << "\n";
739	llvm::errs() << "\t\tOperands:\n";
740	for (Value ubOp : ubOperands [en.index()])
741	llvm::errs() << "\t\t\t" << ubOp << "\n";
742	}
743	}
744
745	/// Fast check to determine if the computation slice is maximal. Returns true if
746	/// each slice dimension maps to an existing dst dimension and both the src
747	/// and the dst loops for those dimensions have the same bounds. Returns false
748	/// if both the src and the dst loops don't have the same bounds. Returns
749	/// std::nullopt if none of the above can be proven.
750	std::optional<bool> ComputationSliceState::isSliceMaximalFastCheck() const {
751	assert(lbs.size() == ubs.size() && !lbs.empty() && !ivs.empty() &&
752	"Unexpected number of lbs, ubs and ivs in slice");
753
754	for (unsigned i = `0`, end = lbs.size(); i < end; ++i) {
755	AffineMap lbMap = lbs [i];
756	AffineMap ubMap = ubs [i];
757
758	// Check if this slice is just an equality along this dimension.
759	if (!lbMap \|\| !ubMap \|\| lbMap.getNumResults() != `1` \|\|
760	ubMap.getNumResults() != `1` \|\|
761	lbMap.getResult(idx: `0`) + `1` != ubMap.getResult(idx: `0`) \|\|
762	// The condition above will be true for maps describing a single
763	// iteration (e.g., lbMap.getResult(0) = 0, ubMap.getResult(0) = 1).
764	// Make sure we skip those cases by checking that the lb result is not
765	// just a constant.
766	isa<AffineConstantExpr>(Val: lbMap.getResult(idx: `0`)))
767	return std::nullopt;
768
769	// Limited support: we expect the lb result to be just a loop dimension for
770	// now.
771	AffineDimExpr result = dyn_cast<AffineDimExpr>(Val: lbMap.getResult(idx: `0`));
772	if (!result)
773	return std::nullopt;
774
775	// Retrieve dst loop bounds.
776	AffineForOp dstLoop =
777	getForInductionVarOwner(lbOperands[i][result.getPosition()]);
778	if (!dstLoop)
779	return std::nullopt;
780	AffineMap dstLbMap = dstLoop.getLowerBoundMap();
781	AffineMap dstUbMap = dstLoop.getUpperBoundMap();
782
783	// Retrieve src loop bounds.
784	AffineForOp srcLoop = getForInductionVarOwner(ivs[i]);
785	assert(srcLoop && "Expected affine for");
786	AffineMap srcLbMap = srcLoop.getLowerBoundMap();
787	AffineMap srcUbMap = srcLoop.getUpperBoundMap();
788
789	// Limited support: we expect simple src and dst loops with a single
790	// constant component per bound for now.
791	if (srcLbMap.getNumResults() != `1` \|\| srcUbMap.getNumResults() != `1` \|\|
792	dstLbMap.getNumResults() != `1` \|\| dstUbMap.getNumResults() != `1`)
793	return std::nullopt;
794
795	AffineExpr srcLbResult = srcLbMap.getResult(idx: `0`);
796	AffineExpr dstLbResult = dstLbMap.getResult(idx: `0`);
797	AffineExpr srcUbResult = srcUbMap.getResult(idx: `0`);
798	AffineExpr dstUbResult = dstUbMap.getResult(idx: `0`);
799	if (!isa<AffineConstantExpr>(Val: srcLbResult) \|\|
800	!isa<AffineConstantExpr>(Val: srcUbResult) \|\|
801	!isa<AffineConstantExpr>(Val: dstLbResult) \|\|
802	!isa<AffineConstantExpr>(Val: dstUbResult))
803	return std::nullopt;
804
805	// Check if src and dst loop bounds are the same. If not, we can guarantee
806	// that the slice is not maximal.
807	if (srcLbResult != dstLbResult \|\| srcUbResult != dstUbResult \|\|
808	srcLoop.getStep() != dstLoop.getStep())
809	return false;
810	}
811
812	return true;
813	}
814
815	/// Returns true if it is deterministically verified that the original iteration
816	/// space of the slice is contained within the new iteration space that is
817	/// created after fusing 'this' slice into its destination.
818	std::optional<bool> ComputationSliceState::isSliceValid() const {
819	// Fast check to determine if the slice is valid. If the following conditions
820	// are verified to be true, slice is declared valid by the fast check:
821	// 1. Each slice loop is a single iteration loop bound in terms of a single
822	// destination loop IV.
823	// 2. Loop bounds of the destination loop IV (from above) and those of the
824	// source loop IV are exactly the same.
825	// If the fast check is inconclusive or false, we proceed with a more
826	// expensive analysis.
827	// TODO: Store the result of the fast check, as it might be used again in
828	// `canRemoveSrcNodeAfterFusion`.
829	std::optional<bool> isValidFastCheck = isSliceMaximalFastCheck();
830	if (isValidFastCheck && *isValidFastCheck)
831	return true;
832
833	// Create constraints for the source loop nest using which slice is computed.
834	FlatAffineValueConstraints srcConstraints;
835	// TODO: Store the source's domain to avoid computation at each depth.
836	if (failed(result: getSourceAsConstraints(cst&: srcConstraints))) {
837	LLVM_DEBUG(llvm::dbgs() << "Unable to compute source's domain\n");
838	return std::nullopt;
839	}
840	// As the set difference utility currently cannot handle symbols in its
841	// operands, validity of the slice cannot be determined.
842	if (srcConstraints.getNumSymbolVars() > `0`) {
843	LLVM_DEBUG(llvm::dbgs() << "Cannot handle symbols in source domain\n");
844	return std::nullopt;
845	}
846	// TODO: Handle local vars in the source domains while using the 'projectOut'
847	// utility below. Currently, aligning is not done assuming that there will be
848	// no local vars in the source domain.
849	if (srcConstraints.getNumLocalVars() != `0`) {
850	LLVM_DEBUG(llvm::dbgs() << "Cannot handle locals in source domain\n");
851	return std::nullopt;
852	}
853
854	// Create constraints for the slice loop nest that would be created if the
855	// fusion succeeds.
856	FlatAffineValueConstraints sliceConstraints;
857	if (failed(result: getAsConstraints(cst: &sliceConstraints))) {
858	LLVM_DEBUG(llvm::dbgs() << "Unable to compute slice's domain\n");
859	return std::nullopt;
860	}
861
862	// Projecting out every dimension other than the 'ivs' to express slice's
863	// domain completely in terms of source's IVs.
864	sliceConstraints.projectOut(pos: ivs.size(),
865	num: sliceConstraints.getNumVars() - ivs.size());
866
867	LLVM_DEBUG(llvm::dbgs() << "Domain of the source of the slice:\n");
868	LLVM_DEBUG(srcConstraints.dump());
869	LLVM_DEBUG(llvm::dbgs() << "Domain of the slice if this fusion succeeds "
870	"(expressed in terms of its source's IVs):\n");
871	LLVM_DEBUG(sliceConstraints.dump());
872
873	// TODO: Store 'srcSet' to avoid recalculating for each depth.
874	PresburgerSet srcSet(srcConstraints);
875	PresburgerSet sliceSet(sliceConstraints);
876	PresburgerSet diffSet = sliceSet.subtract(set: srcSet);
877
878	if (!diffSet.isIntegerEmpty()) {
879	LLVM_DEBUG(llvm::dbgs() << "Incorrect slice\n");
880	return false;
881	}
882	return true;
883	}
884
885	/// Returns true if the computation slice encloses all the iterations of the
886	/// sliced loop nest. Returns false if it does not. Returns std::nullopt if it
887	/// cannot determine if the slice is maximal or not.
888	std::optional<bool> ComputationSliceState::isMaximal() const {
889	// Fast check to determine if the computation slice is maximal. If the result
890	// is inconclusive, we proceed with a more expensive analysis.
891	std::optional<bool> isMaximalFastCheck = isSliceMaximalFastCheck();
892	if (isMaximalFastCheck)
893	return isMaximalFastCheck;
894
895	// Create constraints for the src loop nest being sliced.
896	FlatAffineValueConstraints srcConstraints(/numDims=/ivs.size(),
897	/numSymbols=/`0`,
898	/numLocals=/`0`, ivs);
899	for (Value iv : ivs) {
900	AffineForOp loop = getForInductionVarOwner(iv);
901	assert(loop && "Expected affine for");
902	if (failed(srcConstraints.addAffineForOpDomain(forOp: loop)))
903	return std::nullopt;
904	}
905
906	// Create constraints for the slice using the dst loop nest information. We
907	// retrieve existing dst loops from the lbOperands.
908	SmallVector<Value> consumerIVs;
909	for (Value lbOp : lbOperands [`0`])
910	if (getForInductionVarOwner(lbOp))
911	consumerIVs.push_back(Elt: lbOp);
912
913	// Add empty IV Values for those new loops that are not equalities and,
914	// therefore, are not yet materialized in the IR.
915	for (int i = consumerIVs.size(), end = ivs.size(); i < end; ++i)
916	consumerIVs.push_back(Elt: Value ());
917
918	FlatAffineValueConstraints sliceConstraints(/numDims=/consumerIVs.size(),
919	/numSymbols=/`0`,
920	/numLocals=/`0`, consumerIVs);
921
922	if (failed(result: sliceConstraints.addDomainFromSliceMaps(lbMaps: lbs, ubMaps: ubs, operands: lbOperands [`0`])))
923	return std::nullopt;
924
925	if (srcConstraints.getNumDimVars() != sliceConstraints.getNumDimVars())
926	// Constraint dims are different. The integer set difference can't be
927	// computed so we don't know if the slice is maximal.
928	return std::nullopt;
929
930	// Compute the difference between the src loop nest and the slice integer
931	// sets.
932	PresburgerSet srcSet(srcConstraints);
933	PresburgerSet sliceSet(sliceConstraints);
934	PresburgerSet diffSet = srcSet.subtract(set: sliceSet);
935	return diffSet.isIntegerEmpty();
936	}
937
938	unsigned MemRefRegion::getRank() const {
939	return cast<MemRefType>(memref.getType()).getRank();
940	}
941
942	std::optional<int64_t> MemRefRegion::getConstantBoundingSizeAndShape(
943	SmallVectorImpl<int64_t> shape, std::vector<SmallVector<int64_t, `4`>> lbs,
944	SmallVectorImpl<int64_t> lbDivisors) const* {
945	auto memRefType = cast<MemRefType>(memref.getType());
946	unsigned rank = memRefType.getRank();
947	if (shape)
948	shape->reserve(N: rank);
949
950	assert(rank == cst.getNumDimVars() && "inconsistent memref region");
951
952	// Use a copy of the region constraints that has upper/lower bounds for each
953	// memref dimension with static size added to guard against potential
954	// over-approximation from projection or union bounding box. We may not add
955	// this on the region itself since they might just be redundant constraints
956	// that will need non-trivials means to eliminate.
957	FlatAffineValueConstraints cstWithShapeBounds(cst);
958	for (unsigned r = `0`; r < rank; r++) {
959	cstWithShapeBounds.addBound(type: BoundType::LB, pos: r, value: `0`);
960	int64_t dimSize = memRefType.getDimSize(r);
961	if (ShapedType::isDynamic(dimSize))
962	continue;
963	cstWithShapeBounds.addBound(type: BoundType::UB, pos: r, value: dimSize - `1`);
964	}
965
966	// Find a constant upper bound on the extent of this memref region along each
967	// dimension.
968	int64_t numElements = `1`;
969	int64_t diffConstant;
970	int64_t lbDivisor;
971	for (unsigned d = `0`; d < rank; d++) {
972	SmallVector<int64_t, `4`> lb;
973	std::optional<int64_t> diff =
974	cstWithShapeBounds.getConstantBoundOnDimSize64(pos: d, lb: &lb, boundFloorDivisor: &lbDivisor);
975	if (diff.has_value()) {
976	diffConstant = *diff;
977	assert(diffConstant >= `0` && "Dim size bound can't be negative");
978	assert(lbDivisor > `0`);
979	} else {
980	// If no constant bound is found, then it can always be bound by the
981	// memref's dim size if the latter has a constant size along this dim.
982	auto dimSize = memRefType.getDimSize(d);
983	if (dimSize == ShapedType::kDynamic)
984	return std::nullopt;
985	diffConstant = dimSize;
986	// Lower bound becomes 0.
987	lb.resize(N: cstWithShapeBounds.getNumSymbolVars() + `1`, NV: `0`);
988	lbDivisor = `1`;
989	}
990	numElements *= diffConstant;
991	if (lbs) {
992	lbs->push_back(x: lb);
993	assert(lbDivisors && "both lbs and lbDivisor or none");
994	lbDivisors->push_back(Elt: lbDivisor);
995	}
996	if (shape) {
997	shape->push_back(Elt: diffConstant);
998	}
999	}
1000	return numElements;
1001	}
1002
1003	void MemRefRegion::getLowerAndUpperBound(unsigned pos, AffineMap &lbMap,
1004	AffineMap &ubMap) const {
1005	assert(pos < cst.getNumDimVars() && "invalid position");
1006	auto memRefType = cast<MemRefType>(memref.getType());
1007	unsigned rank = memRefType.getRank();
1008
1009	assert(rank == cst.getNumDimVars() && "inconsistent memref region");
1010
1011	auto boundPairs = cst.getLowerAndUpperBound(
1012	pos, /offset=/`0`, /num=/rank, symStartPos: cst.getNumDimAndSymbolVars(),
1013	/localExprs=/{}, context: memRefType.getContext());
1014	lbMap = boundPairs.first;
1015	ubMap = boundPairs.second;
1016	assert(lbMap && "lower bound for a region must exist");
1017	assert(ubMap && "upper bound for a region must exist");
1018	assert(lbMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1019	assert(ubMap.getNumInputs() == cst.getNumDimAndSymbolVars() - rank);
1020	}
1021
1022	LogicalResult MemRefRegion::unionBoundingBox(const MemRefRegion &other) {
1023	assert(memref == other.memref);
1024	return cst.unionBoundingBox(other: *other.getConstraints());
1025	}
1026
1027	/// Computes the memory region accessed by this memref with the region
1028	/// represented as constraints symbolic/parametric in 'loopDepth' loops
1029	/// surrounding opInst and any additional Function symbols.
1030	// For example, the memref region for this load operation at loopDepth = 1 will
1031	// be as below:
1032	//
1033	// affine.for %i = 0 to 32 {
1034	// affine.for %ii = %i to (d0) -> (d0 + 8) (%i) {
1035	// load %A[%ii]
1036	// }
1037	// }
1038	//
1039	// region: {memref = %A, write = false, {%i <= m0 <= %i + 7} }
1040	// The last field is a 2-d FlatAffineValueConstraints symbolic in %i.
1041	//
1042	// TODO: extend this to any other memref dereferencing ops
1043	// (dma_start, dma_wait).
1044	LogicalResult MemRefRegion::compute(Operation op, unsigned* loopDepth,
1045	const ComputationSliceState *sliceState,
1046	bool addMemRefDimBounds) {
1047	assert((isa<AffineReadOpInterface, AffineWriteOpInterface>(op)) &&
1048	"affine read/write op expected");
1049
1050	MemRefAccess access(op);
1051	memref = access.memref;
1052	write = access.isStore();
1053
1054	unsigned rank = access.getRank();
1055
1056	LLVM_DEBUG(llvm::dbgs() << "MemRefRegion::compute: " << *op
1057	<< "\ndepth: " << loopDepth << "\n";);
1058
1059	// 0-d memrefs.
1060	if (rank == `0`) {
1061	SmallVector<Value, `4`> ivs;
1062	getAffineIVs(op&: *op, ivs);
1063	assert(loopDepth <= ivs.size() && "invalid 'loopDepth'");
1064	// The first 'loopDepth' IVs are symbols for this region.
1065	ivs.resize(N: loopDepth);
1066	// A 0-d memref has a 0-d region.
1067	cst = FlatAffineValueConstraints (rank, loopDepth, /numLocals=/`0`, ivs);
1068	return success();
1069	}
1070
1071	// Build the constraints for this region.
1072	AffineValueMap accessValueMap;
1073	access.getAccessMap(accessMap: &accessValueMap);
1074	AffineMap accessMap = accessValueMap.getAffineMap();
1075
1076	unsigned numDims = accessMap.getNumDims();
1077	unsigned numSymbols = accessMap.getNumSymbols();
1078	unsigned numOperands = accessValueMap.getNumOperands();
1079	// Merge operands with slice operands.
1080	SmallVector<Value, `4`> operands;
1081	operands.resize(N: numOperands);
1082	for (unsigned i = `0`; i < numOperands; ++i)
1083	operands [i] = accessValueMap.getOperand(i);
1084
1085	if (sliceState != nullptr) {
1086	operands.reserve(N: operands.size() + sliceState->lbOperands [`0`].size());
1087	// Append slice operands to 'operands' as symbols.
1088	for (auto extraOperand : sliceState->lbOperands [`0`]) {
1089	if (!llvm::is_contained(Range&: operands, Element: extraOperand)) {
1090	operands.push_back(Elt: extraOperand);
1091	numSymbols++;
1092	}
1093	}
1094	}
1095	// We'll first associate the dims and symbols of the access map to the dims
1096	// and symbols resp. of cst. This will change below once cst is
1097	// fully constructed out.
1098	cst = FlatAffineValueConstraints (numDims, numSymbols, `0`, operands);
1099
1100	// Add equality constraints.
1101	// Add inequalities for loop lower/upper bounds.
1102	for (unsigned i = `0`; i < numDims + numSymbols; ++i) {
1103	auto operand = operands [i];
1104	if (auto affineFor = getForInductionVarOwner(operand)) {
1105	// Note that cst can now have more dimensions than accessMap if the
1106	// bounds expressions involve outer loops or other symbols.
1107	// TODO: rewrite this to use getInstIndexSet; this way
1108	// conditionals will be handled when the latter supports it.
1109	if (failed(cst.addAffineForOpDomain(forOp: affineFor)))
1110	return failure();
1111	} else if (auto parallelOp = getAffineParallelInductionVarOwner(operand)) {
1112	if (failed(cst.addAffineParallelOpDomain(parallelOp: parallelOp)))
1113	return failure();
1114	} else if (isValidSymbol(value: operand)) {
1115	// Check if the symbol is a constant.
1116	Value symbol = operand;
1117	if (auto constVal = getConstantIntValue(ofr: symbol))
1118	cst.addBound(type: BoundType::EQ, val: symbol, value: constVal.value());
1119	} else {
1120	LLVM_DEBUG(llvm::dbgs() << "unknown affine dimensional value");
1121	return failure();
1122	}
1123	}
1124
1125	// Add lower/upper bounds on loop IVs using bounds from 'sliceState'.
1126	if (sliceState != nullptr) {
1127	// Add dim and symbol slice operands.
1128	for (auto operand : sliceState->lbOperands [`0`]) {
1129	cst.addInductionVarOrTerminalSymbol(val: operand);
1130	}
1131	// Add upper/lower bounds from 'sliceState' to 'cst'.
1132	LogicalResult ret =
1133	cst.addSliceBounds(values: sliceState->ivs, lbMaps: sliceState->lbs, ubMaps: sliceState->ubs,
1134	operands: sliceState->lbOperands [`0`]);
1135	assert(succeeded(ret) &&
1136	"should not fail as we never have semi-affine slice maps");
1137	(void)ret;
1138	}
1139
1140	// Add access function equalities to connect loop IVs to data dimensions.
1141	if (failed(result: cst.composeMap(vMap: &accessValueMap))) {
1142	op->emitError(message: "getMemRefRegion: compose affine map failed");
1143	LLVM_DEBUG(accessValueMap.getAffineMap().dump());
1144	return failure();
1145	}
1146
1147	// Set all variables appearing after the first 'rank' variables as
1148	// symbolic variables - so that the ones corresponding to the memref
1149	// dimensions are the dimensional variables for the memref region.
1150	cst.setDimSymbolSeparation(cst.getNumDimAndSymbolVars() - rank);
1151
1152	// Eliminate any loop IVs other than the outermost 'loopDepth' IVs, on which
1153	// this memref region is symbolic.
1154	SmallVector<Value, `4`> enclosingIVs;
1155	getAffineIVs(op&: *op, ivs&: enclosingIVs);
1156	assert(loopDepth <= enclosingIVs.size() && "invalid loop depth");
1157	enclosingIVs.resize(N: loopDepth);
1158	SmallVector<Value, `4`> vars;
1159	cst.getValues(start: cst.getNumDimVars(), end: cst.getNumDimAndSymbolVars(), values: &vars);
1160	for (Value var : vars) {
1161	if ((isAffineInductionVar(val: var)) && !llvm::is_contained(Range&: enclosingIVs, Element: var)) {
1162	cst.projectOut(val: var);
1163	}
1164	}
1165
1166	// Project out any local variables (these would have been added for any
1167	// mod/divs).
1168	cst.projectOut(pos: cst.getNumDimAndSymbolVars(), num: cst.getNumLocalVars());
1169
1170	// Constant fold any symbolic variables.
1171	cst.constantFoldVarRange(/pos=/cst.getNumDimVars(),
1172	/num=/cst.getNumSymbolVars());
1173
1174	assert(cst.getNumDimVars() == rank && "unexpected MemRefRegion format");
1175
1176	// Add upper/lower bounds for each memref dimension with static size
1177	// to guard against potential over-approximation from projection.
1178	// TODO: Support dynamic memref dimensions.
1179	if (addMemRefDimBounds) {
1180	auto memRefType = cast<MemRefType>(memref.getType());
1181	for (unsigned r = `0`; r < rank; r++) {
1182	cst.addBound(type: BoundType::LB, /pos=/r, /value=/`0`);
1183	if (memRefType.isDynamicDim(r))
1184	continue;
1185	cst.addBound(BoundType::UB, /pos=/r, memRefType.getDimSize(r) - `1`);
1186	}
1187	}
1188	cst.removeTrivialRedundancy();
1189
1190	LLVM_DEBUG(llvm::dbgs() << "Memory region:\n");
1191	LLVM_DEBUG(cst.dump());
1192	return success();
1193	}
1194
1195	std::optional<int64_t>
1196	mlir::affine::getMemRefIntOrFloatEltSizeInBytes(MemRefType memRefType) {
1197	auto elementType = memRefType.getElementType();
1198
1199	unsigned sizeInBits;
1200	if (elementType.isIntOrFloat()) {
1201	sizeInBits = elementType.getIntOrFloatBitWidth();
1202	} else if (auto vectorType = dyn_cast<VectorType>(elementType)) {
1203	if (vectorType.getElementType().isIntOrFloat())
1204	sizeInBits =
1205	vectorType.getElementTypeBitWidth() * vectorType.getNumElements();
1206	else
1207	return std::nullopt;
1208	} else {
1209	return std::nullopt;
1210	}
1211	return llvm::divideCeil(Numerator: sizeInBits, Denominator: `8`);
1212	}
1213
1214	// Returns the size of the region.
1215	std::optional<int64_t> MemRefRegion::getRegionSize() {
1216	auto memRefType = cast<MemRefType>(memref.getType());
1217
1218	if (!memRefType.getLayout().isIdentity()) {
1219	LLVM_DEBUG(llvm::dbgs() << "Non-identity layout map not yet supported\n");
1220	return false;
1221	}
1222
1223	// Indices to use for the DmaStart op.
1224	// Indices for the original memref being DMAed from/to.
1225	SmallVector<Value, `4`> memIndices;
1226	// Indices for the faster buffer being DMAed into/from.
1227	SmallVector<Value, `4`> bufIndices;
1228
1229	// Compute the extents of the buffer.
1230	std::optional<int64_t> numElements = getConstantBoundingSizeAndShape();
1231	if (!numElements) {
1232	LLVM_DEBUG(llvm::dbgs() << "Dynamic shapes not yet supported\n");
1233	return std::nullopt;
1234	}
1235	auto eltSize = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1236	if (!eltSize)
1237	return std::nullopt;
1238	return eltSize *numElements;
1239	}
1240
1241	/// Returns the size of memref data in bytes if it's statically shaped,
1242	/// std::nullopt otherwise. If the element of the memref has vector type, takes
1243	/// into account size of the vector as well.
1244	// TODO: improve/complete this when we have target data.
1245	std::optional<uint64_t>
1246	mlir::affine::getIntOrFloatMemRefSizeInBytes(MemRefType memRefType) {
1247	if (!memRefType.hasStaticShape())
1248	return std::nullopt;
1249	auto elementType = memRefType.getElementType();
1250	if (!elementType.isIntOrFloat() && !isa<VectorType>(elementType))
1251	return std::nullopt;
1252
1253	auto sizeInBytes = getMemRefIntOrFloatEltSizeInBytes(memRefType);
1254	if (!sizeInBytes)
1255	return std::nullopt;
1256	for (unsigned i = `0`, e = memRefType.getRank(); i < e; i++) {
1257	sizeInBytes = sizeInBytes memRefType.getDimSize(i);
1258	}
1259	return sizeInBytes;
1260	}
1261
1262	template <typename LoadOrStoreOp>
1263	LogicalResult mlir::affine::boundCheckLoadOrStoreOp(LoadOrStoreOp loadOrStoreOp,
1264	bool emitError) {
1265	static_assert(llvm::is_one_of<LoadOrStoreOp, AffineReadOpInterface,
1266	AffineWriteOpInterface>::value,
1267	"argument should be either a AffineReadOpInterface or a "
1268	"AffineWriteOpInterface");
1269
1270	Operation *op = loadOrStoreOp.getOperation();
1271	MemRefRegion region(op->getLoc());
1272	if (failed(result: region.compute(op, /loopDepth=/`0`, /sliceState=/nullptr,
1273	/addMemRefDimBounds=/false)))
1274	return success();
1275
1276	LLVM_DEBUG(llvm::dbgs() << "Memory region");
1277	LLVM_DEBUG(region.getConstraints()->dump());
1278
1279	bool outOfBounds = false;
1280	unsigned rank = loadOrStoreOp.getMemRefType().getRank();
1281
1282	// For each dimension, check for out of bounds.
1283	for (unsigned r = `0`; r < rank; r++) {
1284	FlatAffineValueConstraints ucst(*region.getConstraints());
1285
1286	// Intersect memory region with constraint capturing out of bounds (both out
1287	// of upper and out of lower), and check if the constraint system is
1288	// feasible. If it is, there is at least one point out of bounds.
1289	SmallVector<int64_t, `4`> ineq(rank + `1`, `0`);
1290	int64_t dimSize = loadOrStoreOp.getMemRefType().getDimSize(r);
1291	// TODO: handle dynamic dim sizes.
1292	if (dimSize == -`1`)
1293	continue;
1294
1295	// Check for overflow: d_i >= memref dim size.
1296	ucst.addBound(type: BoundType::LB, pos: r, value: dimSize);
1297	outOfBounds = !ucst.isEmpty();
1298	if (outOfBounds && emitError) {
1299	loadOrStoreOp.emitOpError()
1300	<< "memref out of upper bound access along dimension #" << (r + `1`);
1301	}
1302
1303	// Check for a negative index.
1304	FlatAffineValueConstraints lcst(*region.getConstraints());
1305	std::fill(first: ineq.begin(), last: ineq.end(), value: `0`);
1306	// d_i <= -1;
1307	lcst.addBound(type: BoundType::UB, pos: r, value: -`1`);
1308	outOfBounds = !lcst.isEmpty();
1309	if (outOfBounds && emitError) {
1310	loadOrStoreOp.emitOpError()
1311	<< "memref out of lower bound access along dimension #" << (r + `1`);
1312	}
1313	}
1314	return failure(isFailure: outOfBounds);
1315	}
1316
1317	// Explicitly instantiate the template so that the compiler knows we need them!
1318	template LogicalResult
1319	mlir::affine::boundCheckLoadOrStoreOp(AffineReadOpInterface loadOp,
1320	bool emitError);
1321	template LogicalResult
1322	mlir::affine::boundCheckLoadOrStoreOp(AffineWriteOpInterface storeOp,
1323	bool emitError);
1324
1325	// Returns in 'positions' the Block positions of 'op' in each ancestor
1326	// Block from the Block containing operation, stopping at 'limitBlock'.
1327	static void findInstPosition(Operation op, Block limitBlock,
1328	SmallVectorImpl<unsigned> *positions) {
1329	Block *block = op->getBlock();
1330	while (block != limitBlock) {
1331	// FIXME: This algorithm is unnecessarily O(n) and should be improved to not
1332	// rely on linear scans.
1333	int instPosInBlock = std::distance(block->begin(), op->getIterator());
1334	positions->push_back(Elt: instPosInBlock);
1335	op = block->getParentOp();
1336	block = op->getBlock();
1337	}
1338	std::reverse(first: positions->begin(), last: positions->end());
1339	}
1340
1341	// Returns the Operation in a possibly nested set of Blocks, where the
1342	// position of the operation is represented by 'positions', which has a
1343	// Block position for each level of nesting.
1344	static Operation getInstAtPosition(ArrayRef<unsigned*> positions,
1345	unsigned level, Block *block) {
1346	unsigned i = `0`;
1347	for (auto &op : *block) {
1348	if (i != positions [level]) {
1349	++i;
1350	continue;
1351	}
1352	if (level == positions.size() - `1`)
1353	return &op;
1354	if (auto childAffineForOp = dyn_cast<AffineForOp>(op))
1355	return getInstAtPosition(positions, level + `1`,
1356	childAffineForOp.getBody());
1357
1358	for (auto &region : op.getRegions()) {
1359	for (auto &b : region)
1360	if (auto *ret = getInstAtPosition(positions, level: level + `1`, block: &b))
1361	return ret;
1362	}
1363	return nullptr;
1364	}
1365	return nullptr;
1366	}
1367
1368	// Adds loop IV bounds to 'cst' for loop IVs not found in 'ivs'.
1369	static LogicalResult addMissingLoopIVBounds(SmallPtrSet<Value, `8`> &ivs,
1370	FlatAffineValueConstraints *cst) {
1371	for (unsigned i = `0`, e = cst->getNumDimVars(); i < e; ++i) {
1372	auto value = cst->getValue(pos: i);
1373	if (ivs.count(Ptr: value) == `0`) {
1374	assert(isAffineForInductionVar(value));
1375	auto loop = getForInductionVarOwner(value);
1376	if (failed(cst->addAffineForOpDomain(forOp: loop)))
1377	return failure();
1378	}
1379	}
1380	return success();
1381	}
1382
1383	/// Returns the innermost common loop depth for the set of operations in 'ops'.
1384	// TODO: Move this to LoopUtils.
1385	unsigned mlir::affine::getInnermostCommonLoopDepth(
1386	ArrayRef<Operation > ops, SmallVectorImpl<AffineForOp> surroundingLoops) {
1387	unsigned numOps = ops.size();
1388	assert(numOps > `0` && "Expected at least one operation");
1389
1390	std::vector<SmallVector<AffineForOp, `4`>> loops(numOps);
1391	unsigned loopDepthLimit = std::numeric_limits<unsigned>::max();
1392	for (unsigned i = `0`; i < numOps; ++i) {
1393	getAffineForIVs(*ops [i], &loops [i]);
1394	loopDepthLimit =
1395	std::min(a: loopDepthLimit, b: static_cast<unsigned>(loops [i].size()));
1396	}
1397
1398	unsigned loopDepth = `0`;
1399	for (unsigned d = `0`; d < loopDepthLimit; ++d) {
1400	unsigned i;
1401	for (i = `1`; i < numOps; ++i) {
1402	if (loops[i - `1`][d] != loops[i][d])
1403	return loopDepth;
1404	}
1405	if (surroundingLoops)
1406	surroundingLoops->push_back(loops[i - `1`][d]);
1407	++loopDepth;
1408	}
1409	return loopDepth;
1410	}
1411
1412	/// Computes in 'sliceUnion' the union of all slice bounds computed at
1413	/// 'loopDepth' between all dependent pairs of ops in 'opsA' and 'opsB', and
1414	/// then verifies if it is valid. Returns 'SliceComputationResult::Success' if
1415	/// union was computed correctly, an appropriate failure otherwise.
1416	SliceComputationResult
1417	mlir::affine::computeSliceUnion(ArrayRef<Operation *> opsA,
1418	ArrayRef<Operation > opsB, unsigned* loopDepth,
1419	unsigned numCommonLoops, bool isBackwardSlice,
1420	ComputationSliceState *sliceUnion) {
1421	// Compute the union of slice bounds between all pairs in 'opsA' and
1422	// 'opsB' in 'sliceUnionCst'.
1423	FlatAffineValueConstraints sliceUnionCst;
1424	assert(sliceUnionCst.getNumDimAndSymbolVars() == `0`);
1425	std::vector<std::pair<Operation , Operation >> dependentOpPairs;
1426	for (auto *i : opsA) {
1427	MemRefAccess srcAccess(i);
1428	for (auto *j : opsB) {
1429	MemRefAccess dstAccess(j);
1430	if (srcAccess.memref != dstAccess.memref)
1431	continue;
1432	// Check if 'loopDepth' exceeds nesting depth of src/dst ops.
1433	if ((!isBackwardSlice && loopDepth > getNestingDepth(op: i)) \|\|
1434	(isBackwardSlice && loopDepth > getNestingDepth(op: j))) {
1435	LLVM_DEBUG(llvm::dbgs() << "Invalid loop depth\n");
1436	return SliceComputationResult::GenericFailure;
1437	}
1438
1439	bool readReadAccesses = isa<AffineReadOpInterface>(srcAccess.opInst) &&
1440	isa<AffineReadOpInterface>(dstAccess.opInst);
1441	FlatAffineValueConstraints dependenceConstraints;
1442	// Check dependence between 'srcAccess' and 'dstAccess'.
1443	DependenceResult result = checkMemrefAccessDependence(
1444	srcAccess, dstAccess, /loopDepth=/numCommonLoops + `1`,
1445	dependenceConstraints: &dependenceConstraints, /dependenceComponents=/nullptr,
1446	/allowRAR=/readReadAccesses);
1447	if (result.value == DependenceResult::Failure) {
1448	LLVM_DEBUG(llvm::dbgs() << "Dependence check failed\n");
1449	return SliceComputationResult::GenericFailure;
1450	}
1451	if (result.value == DependenceResult::NoDependence)
1452	continue;
1453	dependentOpPairs.emplace_back(args&: i, args&: j);
1454
1455	// Compute slice bounds for 'srcAccess' and 'dstAccess'.
1456	ComputationSliceState tmpSliceState;
1457	mlir::affine::getComputationSliceState(depSourceOp: i, depSinkOp: j, dependenceConstraints: &dependenceConstraints,
1458	loopDepth, isBackwardSlice,
1459	sliceState: &tmpSliceState);
1460
1461	if (sliceUnionCst.getNumDimAndSymbolVars() == `0`) {
1462	// Initialize 'sliceUnionCst' with the bounds computed in previous step.
1463	if (failed(result: tmpSliceState.getAsConstraints(cst: &sliceUnionCst))) {
1464	LLVM_DEBUG(llvm::dbgs()
1465	<< "Unable to compute slice bound constraints\n");
1466	return SliceComputationResult::GenericFailure;
1467	}
1468	assert(sliceUnionCst.getNumDimAndSymbolVars() > `0`);
1469	continue;
1470	}
1471
1472	// Compute constraints for 'tmpSliceState' in 'tmpSliceCst'.
1473	FlatAffineValueConstraints tmpSliceCst;
1474	if (failed(result: tmpSliceState.getAsConstraints(cst: &tmpSliceCst))) {
1475	LLVM_DEBUG(llvm::dbgs()
1476	<< "Unable to compute slice bound constraints\n");
1477	return SliceComputationResult::GenericFailure;
1478	}
1479
1480	// Align coordinate spaces of 'sliceUnionCst' and 'tmpSliceCst' if needed.
1481	if (!sliceUnionCst.areVarsAlignedWithOther(other: tmpSliceCst)) {
1482
1483	// Pre-constraint var alignment: record loop IVs used in each constraint
1484	// system.
1485	SmallPtrSet<Value, `8`> sliceUnionIVs;
1486	for (unsigned k = `0`, l = sliceUnionCst.getNumDimVars(); k < l; ++k)
1487	sliceUnionIVs.insert(Ptr: sliceUnionCst.getValue(pos: k));
1488	SmallPtrSet<Value, `8`> tmpSliceIVs;
1489	for (unsigned k = `0`, l = tmpSliceCst.getNumDimVars(); k < l; ++k)
1490	tmpSliceIVs.insert(Ptr: tmpSliceCst.getValue(pos: k));
1491
1492	sliceUnionCst.mergeAndAlignVarsWithOther(/offset=/`0`, other: &tmpSliceCst);
1493
1494	// Post-constraint var alignment: add loop IV bounds missing after
1495	// var alignment to constraint systems. This can occur if one constraint
1496	// system uses an loop IV that is not used by the other. The call
1497	// to unionBoundingBox below expects constraints for each Loop IV, even
1498	// if they are the unsliced full loop bounds added here.
1499	if (failed(result: addMissingLoopIVBounds(ivs&: sliceUnionIVs, cst: &sliceUnionCst)))
1500	return SliceComputationResult::GenericFailure;
1501	if (failed(result: addMissingLoopIVBounds(ivs&: tmpSliceIVs, cst: &tmpSliceCst)))
1502	return SliceComputationResult::GenericFailure;
1503	}
1504	// Compute union bounding box of 'sliceUnionCst' and 'tmpSliceCst'.
1505	if (sliceUnionCst.getNumLocalVars() > `0` \|\|
1506	tmpSliceCst.getNumLocalVars() > `0` \|\|
1507	failed(result: sliceUnionCst.unionBoundingBox(other: tmpSliceCst))) {
1508	LLVM_DEBUG(llvm::dbgs()
1509	<< "Unable to compute union bounding box of slice bounds\n");
1510	return SliceComputationResult::GenericFailure;
1511	}
1512	}
1513	}
1514
1515	// Empty union.
1516	if (sliceUnionCst.getNumDimAndSymbolVars() == `0`)
1517	return SliceComputationResult::GenericFailure;
1518
1519	// Gather loops surrounding ops from loop nest where slice will be inserted.
1520	SmallVector<Operation *, `4`> ops;
1521	for (auto &dep : dependentOpPairs) {
1522	ops.push_back(Elt: isBackwardSlice ? dep.second : dep.first);
1523	}
1524	SmallVector<AffineForOp, `4`> surroundingLoops;
1525	unsigned innermostCommonLoopDepth =
1526	getInnermostCommonLoopDepth(ops, &surroundingLoops);
1527	if (loopDepth > innermostCommonLoopDepth) {
1528	LLVM_DEBUG(llvm::dbgs() << "Exceeds max loop depth\n");
1529	return SliceComputationResult::GenericFailure;
1530	}
1531
1532	// Store 'numSliceLoopIVs' before converting dst loop IVs to dims.
1533	unsigned numSliceLoopIVs = sliceUnionCst.getNumDimVars();
1534
1535	// Convert any dst loop IVs which are symbol variables to dim variables.
1536	sliceUnionCst.convertLoopIVSymbolsToDims();
1537	sliceUnion->clearBounds();
1538	sliceUnion->lbs.resize(N: numSliceLoopIVs, NV: AffineMap ());
1539	sliceUnion->ubs.resize(N: numSliceLoopIVs, NV: AffineMap ());
1540
1541	// Get slice bounds from slice union constraints 'sliceUnionCst'.
1542	sliceUnionCst.getSliceBounds(/offset=/`0`, num: numSliceLoopIVs,
1543	context: opsA [`0`]->getContext(), lbMaps: &sliceUnion->lbs,
1544	ubMaps: &sliceUnion->ubs);
1545
1546	// Add slice bound operands of union.
1547	SmallVector<Value, `4`> sliceBoundOperands;
1548	sliceUnionCst.getValues(start: numSliceLoopIVs,
1549	end: sliceUnionCst.getNumDimAndSymbolVars(),
1550	values: &sliceBoundOperands);
1551
1552	// Copy src loop IVs from 'sliceUnionCst' to 'sliceUnion'.
1553	sliceUnion->ivs.clear();
1554	sliceUnionCst.getValues(start: `0`, end: numSliceLoopIVs, values: &sliceUnion->ivs);
1555
1556	// Set loop nest insertion point to block start at 'loopDepth'.
1557	sliceUnion->insertPoint =
1558	isBackwardSlice
1559	? surroundingLoops[loopDepth - `1`].getBody()->begin()
1560	: std::prev(surroundingLoops[loopDepth - `1`].getBody()->end());
1561
1562	// Give each bound its own copy of 'sliceBoundOperands' for subsequent
1563	// canonicalization.
1564	sliceUnion->lbOperands.resize(new_size: numSliceLoopIVs, x: sliceBoundOperands);
1565	sliceUnion->ubOperands.resize(new_size: numSliceLoopIVs, x: sliceBoundOperands);
1566
1567	// Check if the slice computed is valid. Return success only if it is verified
1568	// that the slice is valid, otherwise return appropriate failure status.
1569	std::optional<bool> isSliceValid = sliceUnion->isSliceValid();
1570	if (!isSliceValid) {
1571	LLVM_DEBUG(llvm::dbgs() << "Cannot determine if the slice is valid\n");
1572	return SliceComputationResult::GenericFailure;
1573	}
1574	if (!*isSliceValid)
1575	return SliceComputationResult::IncorrectSliceFailure;
1576
1577	return SliceComputationResult::Success;
1578	}
1579
1580	// TODO: extend this to handle multiple result maps.
1581	static std::optional<uint64_t> getConstDifference(AffineMap lbMap,
1582	AffineMap ubMap) {
1583	assert(lbMap.getNumResults() == `1` && "expected single result bound map");
1584	assert(ubMap.getNumResults() == `1` && "expected single result bound map");
1585	assert(lbMap.getNumDims() == ubMap.getNumDims());
1586	assert(lbMap.getNumSymbols() == ubMap.getNumSymbols());
1587	AffineExpr lbExpr(lbMap.getResult(idx: `0`));
1588	AffineExpr ubExpr(ubMap.getResult(idx: `0`));
1589	auto loopSpanExpr = simplifyAffineExpr(expr: ubExpr - lbExpr, numDims: lbMap.getNumDims(),
1590	numSymbols: lbMap.getNumSymbols());
1591	auto cExpr = dyn_cast<AffineConstantExpr>(Val&: loopSpanExpr);
1592	if (!cExpr)
1593	return std::nullopt;
1594	return cExpr.getValue();
1595	}
1596
1597	// Builds a map 'tripCountMap' from AffineForOp to constant trip count for loop
1598	// nest surrounding represented by slice loop bounds in 'slice'. Returns true
1599	// on success, false otherwise (if a non-constant trip count was encountered).
1600	// TODO: Make this work with non-unit step loops.
1601	bool mlir::affine::buildSliceTripCountMap(
1602	const ComputationSliceState &slice,
1603	llvm::SmallDenseMap<Operation , uint64_t, `8`> tripCountMap) {
1604	unsigned numSrcLoopIVs = slice.ivs.size();
1605	// Populate map from AffineForOp -> trip count
1606	for (unsigned i = `0`; i < numSrcLoopIVs; ++i) {
1607	AffineForOp forOp = getForInductionVarOwner(slice.ivs[i]);
1608	auto *op = forOp.getOperation();
1609	AffineMap lbMap = slice.lbs [i];
1610	AffineMap ubMap = slice.ubs [i];
1611	// If lower or upper bound maps are null or provide no results, it implies
1612	// that source loop was not at all sliced, and the entire loop will be a
1613	// part of the slice.
1614	if (!lbMap \|\| lbMap.getNumResults() == `0` \|\| !ubMap \|\|
1615	ubMap.getNumResults() == `0`) {
1616	// The iteration of src loop IV 'i' was not sliced. Use full loop bounds.
1617	if (forOp.hasConstantLowerBound() && forOp.hasConstantUpperBound()) {
1618	(*tripCountMap)[op] =
1619	forOp.getConstantUpperBound() - forOp.getConstantLowerBound();
1620	continue;
1621	}
1622	std::optional<uint64_t> maybeConstTripCount = getConstantTripCount(forOp);
1623	if (maybeConstTripCount.has_value()) {
1624	(tripCountMap)[op] = maybeConstTripCount;
1625	continue;
1626	}
1627	return false;
1628	}
1629	std::optional<uint64_t> tripCount = getConstDifference(lbMap, ubMap);
1630	// Slice bounds are created with a constant ub - lb difference.
1631	if (!tripCount.has_value())
1632	return false;
1633	(tripCountMap)[op] = tripCount;
1634	}
1635	return true;
1636	}
1637
1638	// Return the number of iterations in the given slice.
1639	uint64_t mlir::affine::getSliceIterationCount(
1640	const llvm::SmallDenseMap<Operation *, uint64_t, `8`> &sliceTripCountMap) {
1641	uint64_t iterCount = `1`;
1642	for (const auto &count : sliceTripCountMap) {
1643	iterCount *= count.second;
1644	}
1645	return iterCount;
1646	}
1647
1648	const char *const kSliceFusionBarrierAttrName = "slice_fusion_barrier";
1649	// Computes slice bounds by projecting out any loop IVs from
1650	// 'dependenceConstraints' at depth greater than 'loopDepth', and computes slice
1651	// bounds in 'sliceState' which represent the one loop nest's IVs in terms of
1652	// the other loop nest's IVs, symbols and constants (using 'isBackwardsSlice').
1653	void mlir::affine::getComputationSliceState(
1654	Operation depSourceOp, Operation depSinkOp,
1655	FlatAffineValueConstraints dependenceConstraints, unsigned* loopDepth,
1656	bool isBackwardSlice, ComputationSliceState *sliceState) {
1657	// Get loop nest surrounding src operation.
1658	SmallVector<AffineForOp, `4`> srcLoopIVs;
1659	getAffineForIVs(*depSourceOp, &srcLoopIVs);
1660	unsigned numSrcLoopIVs = srcLoopIVs.size();
1661
1662	// Get loop nest surrounding dst operation.
1663	SmallVector<AffineForOp, `4`> dstLoopIVs;
1664	getAffineForIVs(*depSinkOp, &dstLoopIVs);
1665	unsigned numDstLoopIVs = dstLoopIVs.size();
1666
1667	assert((!isBackwardSlice && loopDepth <= numSrcLoopIVs) \|\|
1668	(isBackwardSlice && loopDepth <= numDstLoopIVs));
1669
1670	// Project out dimensions other than those up to 'loopDepth'.
1671	unsigned pos = isBackwardSlice ? numSrcLoopIVs + loopDepth : loopDepth;
1672	unsigned num =
1673	isBackwardSlice ? numDstLoopIVs - loopDepth : numSrcLoopIVs - loopDepth;
1674	dependenceConstraints->projectOut(pos, num);
1675
1676	// Add slice loop IV values to 'sliceState'.
1677	unsigned offset = isBackwardSlice ? `0` : loopDepth;
1678	unsigned numSliceLoopIVs = isBackwardSlice ? numSrcLoopIVs : numDstLoopIVs;
1679	dependenceConstraints->getValues(start: offset, end: offset + numSliceLoopIVs,
1680	values: &sliceState->ivs);
1681
1682	// Set up lower/upper bound affine maps for the slice.
1683	sliceState->lbs.resize(N: numSliceLoopIVs, NV: AffineMap ());
1684	sliceState->ubs.resize(N: numSliceLoopIVs, NV: AffineMap ());
1685
1686	// Get bounds for slice IVs in terms of other IVs, symbols, and constants.
1687	dependenceConstraints->getSliceBounds(offset, num: numSliceLoopIVs,
1688	context: depSourceOp->getContext(),
1689	lbMaps: &sliceState->lbs, ubMaps: &sliceState->ubs);
1690
1691	// Set up bound operands for the slice's lower and upper bounds.
1692	SmallVector<Value, `4`> sliceBoundOperands;
1693	unsigned numDimsAndSymbols = dependenceConstraints->getNumDimAndSymbolVars();
1694	for (unsigned i = `0`; i < numDimsAndSymbols; ++i) {
1695	if (i < offset \|\| i >= offset + numSliceLoopIVs) {
1696	sliceBoundOperands.push_back(Elt: dependenceConstraints->getValue(pos: i));
1697	}
1698	}
1699
1700	// Give each bound its own copy of 'sliceBoundOperands' for subsequent
1701	// canonicalization.
1702	sliceState->lbOperands.resize(new_size: numSliceLoopIVs, x: sliceBoundOperands);
1703	sliceState->ubOperands.resize(new_size: numSliceLoopIVs, x: sliceBoundOperands);
1704
1705	// Set destination loop nest insertion point to block start at 'dstLoopDepth'.
1706	sliceState->insertPoint =
1707	isBackwardSlice ? dstLoopIVs[loopDepth - `1`].getBody()->begin()
1708	: std::prev(srcLoopIVs[loopDepth - `1`].getBody()->end());
1709
1710	llvm::SmallDenseSet<Value, `8`> sequentialLoops;
1711	if (isa<AffineReadOpInterface>(depSourceOp) &&
1712	isa<AffineReadOpInterface>(depSinkOp)) {
1713	// For read-read access pairs, clear any slice bounds on sequential loops.
1714	// Get sequential loops in loop nest rooted at 'srcLoopIVs[0]'.
1715	getSequentialLoops(isBackwardSlice ? srcLoopIVs[`0`] : dstLoopIVs[`0`],
1716	&sequentialLoops);
1717	}
1718	auto getSliceLoop = [&](unsigned i) {
1719	return isBackwardSlice ? srcLoopIVs[i] : dstLoopIVs[i];
1720	};
1721	auto isInnermostInsertion = [&]() {
1722	return (isBackwardSlice ? loopDepth >= srcLoopIVs.size()
1723	: loopDepth >= dstLoopIVs.size());
1724	};
1725	llvm::SmallDenseMap<Operation *, uint64_t, `8`> sliceTripCountMap;
1726	auto srcIsUnitSlice = [&]() {
1727	return (buildSliceTripCountMap(slice: *sliceState, tripCountMap: &sliceTripCountMap) &&
1728	(getSliceIterationCount(sliceTripCountMap) == `1`));
1729	};
1730	// Clear all sliced loop bounds beginning at the first sequential loop, or
1731	// first loop with a slice fusion barrier attribute..
1732
1733	for (unsigned i = `0`; i < numSliceLoopIVs; ++i) {
1734	Value iv = getSliceLoop (i).getInductionVar();
1735	if (sequentialLoops.count(V: iv) == `0` &&
1736	getSliceLoop (i)->getAttr(kSliceFusionBarrierAttrName) == nullptr)
1737	continue;
1738	// Skip reset of bounds of reduction loop inserted in the destination loop
1739	// that meets the following conditions:
1740	// 1. Slice is single trip count.
1741	// 2. Loop bounds of the source and destination match.
1742	// 3. Is being inserted at the innermost insertion point.
1743	std::optional<bool> isMaximal = sliceState->isMaximal();
1744	if (isLoopParallelAndContainsReduction(getSliceLoop (i)) &&
1745	isInnermostInsertion() && srcIsUnitSlice () && isMaximal && *isMaximal)
1746	continue;
1747	for (unsigned j = i; j < numSliceLoopIVs; ++j) {
1748	sliceState->lbs [j] = AffineMap ();
1749	sliceState->ubs [j] = AffineMap ();
1750	}
1751	break;
1752	}
1753	}
1754
1755	/// Creates a computation slice of the loop nest surrounding 'srcOpInst',
1756	/// updates the slice loop bounds with any non-null bound maps specified in
1757	/// 'sliceState', and inserts this slice into the loop nest surrounding
1758	/// 'dstOpInst' at loop depth 'dstLoopDepth'.
1759	// TODO: extend the slicing utility to compute slices that
1760	// aren't necessarily a one-to-one relation b/w the source and destination. The
1761	// relation between the source and destination could be many-to-many in general.
1762	// TODO: the slice computation is incorrect in the cases
1763	// where the dependence from the source to the destination does not cover the
1764	// entire destination index set. Subtract out the dependent destination
1765	// iterations from destination index set and check for emptiness --- this is one
1766	// solution.
1767	AffineForOp mlir::affine::insertBackwardComputationSlice(
1768	Operation srcOpInst, Operation dstOpInst, unsigned dstLoopDepth,
1769	ComputationSliceState *sliceState) {
1770	// Get loop nest surrounding src operation.
1771	SmallVector<AffineForOp, `4`> srcLoopIVs;
1772	getAffineForIVs(*srcOpInst, &srcLoopIVs);
1773	unsigned numSrcLoopIVs = srcLoopIVs.size();
1774
1775	// Get loop nest surrounding dst operation.
1776	SmallVector<AffineForOp, `4`> dstLoopIVs;
1777	getAffineForIVs(*dstOpInst, &dstLoopIVs);
1778	unsigned dstLoopIVsSize = dstLoopIVs.size();
1779	if (dstLoopDepth > dstLoopIVsSize) {
1780	dstOpInst->emitError(message: "invalid destination loop depth");
1781	return AffineForOp();
1782	}
1783
1784	// Find the op block positions of 'srcOpInst' within 'srcLoopIVs'.
1785	SmallVector<unsigned, `4`> positions;
1786	// TODO: This code is incorrect since srcLoopIVs can be 0-d.
1787	findInstPosition(srcOpInst, srcLoopIVs[`0`]->getBlock(), &positions);
1788
1789	// Clone src loop nest and insert it a the beginning of the operation block
1790	// of the loop at 'dstLoopDepth' in 'dstLoopIVs'.
1791	auto dstAffineForOp = dstLoopIVs[dstLoopDepth - `1`];
1792	OpBuilder b(dstAffineForOp.getBody(), dstAffineForOp.getBody()->begin());
1793	auto sliceLoopNest =
1794	cast<AffineForOp>(b.clone(*srcLoopIVs[`0`].getOperation()));
1795
1796	Operation *sliceInst =
1797	getInstAtPosition(positions, /level=/`0`, sliceLoopNest.getBody());
1798	// Get loop nest surrounding 'sliceInst'.
1799	SmallVector<AffineForOp, `4`> sliceSurroundingLoops;
1800	getAffineForIVs(*sliceInst, &sliceSurroundingLoops);
1801
1802	// Sanity check.
1803	unsigned sliceSurroundingLoopsSize = sliceSurroundingLoops.size();
1804	(void)sliceSurroundingLoopsSize;
1805	assert(dstLoopDepth + numSrcLoopIVs >= sliceSurroundingLoopsSize);
1806	unsigned sliceLoopLimit = dstLoopDepth + numSrcLoopIVs;
1807	(void)sliceLoopLimit;
1808	assert(sliceLoopLimit >= sliceSurroundingLoopsSize);
1809
1810	// Update loop bounds for loops in 'sliceLoopNest'.
1811	for (unsigned i = `0`; i < numSrcLoopIVs; ++i) {
1812	auto forOp = sliceSurroundingLoops[dstLoopDepth + i];
1813	if (AffineMap lbMap = sliceState->lbs [i])
1814	forOp.setLowerBound(sliceState->lbOperands [i], lbMap);
1815	if (AffineMap ubMap = sliceState->ubs [i])
1816	forOp.setUpperBound(sliceState->ubOperands [i], ubMap);
1817	}
1818	return sliceLoopNest;
1819	}
1820
1821	// Constructs MemRefAccess populating it with the memref, its indices and
1822	// opinst from 'loadOrStoreOpInst'.
1823	MemRefAccess::MemRefAccess(Operation *loadOrStoreOpInst) {
1824	if (auto loadOp = dyn_cast<AffineReadOpInterface>(loadOrStoreOpInst)) {
1825	memref = loadOp.getMemRef();
1826	opInst = loadOrStoreOpInst;
1827	llvm::append_range(indices, loadOp.getMapOperands());
1828	} else {
1829	assert(isa<AffineWriteOpInterface>(loadOrStoreOpInst) &&
1830	"Affine read/write op expected");
1831	auto storeOp = cast<AffineWriteOpInterface>(loadOrStoreOpInst);
1832	opInst = loadOrStoreOpInst;
1833	memref = storeOp.getMemRef();
1834	llvm::append_range(indices, storeOp.getMapOperands());
1835	}
1836	}
1837
1838	unsigned MemRefAccess::getRank() const {
1839	return cast<MemRefType>(memref.getType()).getRank();
1840	}
1841
1842	bool MemRefAccess::isStore() const {
1843	return isa<AffineWriteOpInterface>(opInst);
1844	}
1845
1846	/// Returns the nesting depth of this statement, i.e., the number of loops
1847	/// surrounding this statement.
1848	unsigned mlir::affine::getNestingDepth(Operation *op) {
1849	Operation *currOp = op;
1850	unsigned depth = `0`;
1851	while ((currOp = currOp->getParentOp())) {
1852	if (isa<AffineForOp>(Val: currOp))
1853	depth++;
1854	}
1855	return depth;
1856	}
1857
1858	/// Equal if both affine accesses are provably equivalent (at compile
1859	/// time) when considering the memref, the affine maps and their respective
1860	/// operands. The equality of access functions + operands is checked by
1861	/// subtracting fully composed value maps, and then simplifying the difference
1862	/// using the expression flattener.
1863	/// TODO: this does not account for aliasing of memrefs.
1864	bool MemRefAccess::operator==(const MemRefAccess &rhs) const {
1865	if (memref != rhs.memref)
1866	return false;
1867
1868	AffineValueMap diff, thisMap, rhsMap;
1869	getAccessMap(accessMap: &thisMap);
1870	rhs.getAccessMap(accessMap: &rhsMap);
1871	AffineValueMap::difference(a: thisMap, b: rhsMap, res: &diff);
1872	return llvm::all_of(Range: diff.getAffineMap().getResults(),
1873	P: [](AffineExpr e) { return e == `0`; });
1874	}
1875
1876	void mlir::affine::getAffineIVs(Operation &op, SmallVectorImpl<Value> &ivs) {
1877	auto *currOp = op.getParentOp();
1878	AffineForOp currAffineForOp;
1879	// Traverse up the hierarchy collecting all 'affine.for' and affine.parallel
1880	// operation while skipping over 'affine.if' operations.
1881	while (currOp) {
1882	if (AffineForOp currAffineForOp = dyn_cast<AffineForOp>(currOp))
1883	ivs.push_back(Elt: currAffineForOp.getInductionVar());
1884	else if (auto parOp = dyn_cast<AffineParallelOp>(currOp))
1885	llvm::append_range(ivs, parOp.getIVs());
1886	currOp = currOp->getParentOp();
1887	}
1888	std::reverse(first: ivs.begin(), last: ivs.end());
1889	}
1890
1891	/// Returns the number of surrounding loops common to 'loopsA' and 'loopsB',
1892	/// where each lists loops from outer-most to inner-most in loop nest.
1893	unsigned mlir::affine::getNumCommonSurroundingLoops(Operation &a,
1894	Operation &b) {
1895	SmallVector<Value, `4`> loopsA, loopsB;
1896	getAffineIVs(op&: a, ivs&: loopsA);
1897	getAffineIVs(op&: b, ivs&: loopsB);
1898
1899	unsigned minNumLoops = std::min(a: loopsA.size(), b: loopsB.size());
1900	unsigned numCommonLoops = `0`;
1901	for (unsigned i = `0`; i < minNumLoops; ++i) {
1902	if (loopsA [i] != loopsB [i])
1903	break;
1904	++numCommonLoops;
1905	}
1906	return numCommonLoops;
1907	}
1908
1909	static std::optional<int64_t> getMemoryFootprintBytes(Block &block,
1910	Block::iterator start,
1911	Block::iterator end,
1912	int memorySpace) {
1913	SmallDenseMap<Value, std::unique_ptr<MemRefRegion>, `4`> regions;
1914
1915	// Walk this 'affine.for' operation to gather all memory regions.
1916	auto result = block.walk(begin: start, end, callback: [&](Operation *opInst) -> WalkResult {
1917	if (!isa<AffineReadOpInterface, AffineWriteOpInterface>(opInst)) {
1918	// Neither load nor a store op.
1919	return WalkResult::advance();
1920	}
1921
1922	// Compute the memref region symbolic in any IVs enclosing this block.
1923	auto region = std::make_unique<MemRefRegion>(args: opInst->getLoc());
1924	if (failed(
1925	result: region ->compute(op: opInst,
1926	/loopDepth=/getNestingDepth(op: &*block.begin())))) {
1927	return opInst->emitError(message: "error obtaining memory region\n");
1928	}
1929
1930	auto it = regions.find(Val: region ->memref);
1931	if (it == regions.end()) {
1932	regions [region ->memref] = std::move(region);
1933	} else if (failed(result: it ->second ->unionBoundingBox(other: *region))) {
1934	return opInst->emitWarning(
1935	message: "getMemoryFootprintBytes: unable to perform a union on a memory "
1936	"region");
1937	}
1938	return WalkResult::advance();
1939	});
1940	if (result.wasInterrupted())
1941	return std::nullopt;
1942
1943	int64_t totalSizeInBytes = `0`;
1944	for (const auto &region : regions) {
1945	std::optional<int64_t> size = region.second ->getRegionSize();
1946	if (!size.has_value())
1947	return std::nullopt;
1948	totalSizeInBytes += *size;
1949	}
1950	return totalSizeInBytes;
1951	}
1952
1953	std::optional<int64_t> mlir::affine::getMemoryFootprintBytes(AffineForOp forOp,
1954	int memorySpace) {
1955	auto *forInst = forOp.getOperation();
1956	return ::getMemoryFootprintBytes(
1957	block&: *forInst->getBlock(), start: Block::iterator(forInst),
1958	end: std::next(x: Block::iterator(forInst)), memorySpace);
1959	}
1960
1961	/// Returns whether a loop is parallel and contains a reduction loop.
1962	bool mlir::affine::isLoopParallelAndContainsReduction(AffineForOp forOp) {
1963	SmallVector<LoopReduction> reductions;
1964	if (!isLoopParallel(forOp, &reductions))
1965	return false;
1966	return !reductions.empty();
1967	}
1968
1969	/// Returns in 'sequentialLoops' all sequential loops in loop nest rooted
1970	/// at 'forOp'.
1971	void mlir::affine::getSequentialLoops(
1972	AffineForOp forOp, llvm::SmallDenseSet<Value, `8`> *sequentialLoops) {
1973	forOp->walk([&](Operation *op) {
1974	if (auto innerFor = dyn_cast<AffineForOp>(op))
1975	if (!isLoopParallel(innerFor))
1976	sequentialLoops->insert(innerFor.getInductionVar());
1977	});
1978	}
1979
1980	IntegerSet mlir::affine::simplifyIntegerSet(IntegerSet set) {
1981	FlatAffineValueConstraints fac(set);
1982	if (fac.isEmpty())
1983	return IntegerSet::getEmptySet(numDims: set.getNumDims(), numSymbols: set.getNumSymbols(),
1984	context: set.getContext());
1985	fac.removeTrivialRedundancy();
1986
1987	auto simplifiedSet = fac.getAsIntegerSet(context: set.getContext());
1988	assert(simplifiedSet && "guaranteed to succeed while roundtripping");
1989	return simplifiedSet;
1990	}
1991
1992	static void unpackOptionalValues(ArrayRef<std::optional<Value>> source,
1993	SmallVector<Value> &target) {
1994	target =
1995	llvm::to_vector<`4`>(Range: llvm::map_range(C&: source, F: [](std::optional<Value> val) {
1996	return val.has_value() ? *val : Value ();
1997	}));
1998	}
1999
2000	/// Bound an identifier `pos` in a given FlatAffineValueConstraints with
2001	/// constraints drawn from an affine map. Before adding the constraint, the
2002	/// dimensions/symbols of the affine map are aligned with `constraints`.
2003	/// `operands` are the SSA Value operands used with the affine map.
2004	/// Note: This function adds a new symbol column to the `constraints` for each
2005	/// dimension/symbol that exists in the affine map but not in `constraints`.
2006	static LogicalResult alignAndAddBound(FlatAffineValueConstraints &constraints,
2007	BoundType type, unsigned pos,
2008	AffineMap map, ValueRange operands) {
2009	SmallVector<Value> dims, syms, newSyms;
2010	unpackOptionalValues(source: constraints.getMaybeValues(kind: VarKind::SetDim), target&: dims);
2011	unpackOptionalValues(source: constraints.getMaybeValues(kind: VarKind::Symbol), target&: syms);
2012
2013	AffineMap alignedMap =
2014	alignAffineMapWithValues(map, operands, dims, syms, newSyms: &newSyms);
2015	for (unsigned i = syms.size(); i < newSyms.size(); ++i)
2016	constraints.appendSymbolVar(vals: newSyms [i]);
2017	return constraints.addBound(type, pos, boundMap: alignedMap);
2018	}
2019
2020	/// Add `val` to each result of `map`.
2021	static AffineMap addConstToResults(AffineMap map, int64_t val) {
2022	SmallVector<AffineExpr> newResults;
2023	for (AffineExpr r : map.getResults())
2024	newResults.push_back(Elt: r + val);
2025	return AffineMap::get(dimCount: map.getNumDims(), symbolCount: map.getNumSymbols(), results: newResults,
2026	context: map.getContext());
2027	}
2028
2029	// Attempt to simplify the given min/max operation by proving that its value is
2030	// bounded by the same lower and upper bound.
2031	//
2032	// Bounds are computed by FlatAffineValueConstraints. Invariants required for
2033	// finding/proving bounds should be supplied via `constraints`.
2034	//
2035	// 1. Add dimensions for `op` and `opBound` (lower or upper bound of `op`).
2036	// 2. Compute an upper bound of `op` (in case of `isMin`) or a lower bound (in
2037	// case of `!isMin`) and bind it to `opBound`. SSA values that are used in
2038	// `op` but are not part of `constraints`, are added as extra symbols.
2039	// 3. For each result of `op`: Add result as a dimension `r_i`. Prove that:
2040	// If `isMin`: r_i >= opBound*
2041	// If `isMax`: r_i <= opBound*
2042	// If this is the case, ub(op) == lb(op).
2043	// 4. Replace `op` with `opBound`.
2044	//
2045	// In summary, the following constraints are added throughout this function.
2046	// Note: `invar` are dimensions added by the caller to express the invariants.
2047	// (Showing only the case where `isMin`.)
2048	//
2049	// invar \| op \| opBound \| r_i \| extra syms... \| const \| eq/ineq
2050	// ------+-------+---------+-----+---------------+-------+-------------------
2051	// (various eq./ineq. constraining `invar`, added by the caller)
2052	// ... \| 0 \| 0 \| 0 \| 0 \| ... \| ...
2053	// ------+-------+---------+-----+---------------+-------+-------------------
2054	// (various ineq. constraining `op` in terms of `op` operands (`invar` and
2055	// extra `op` operands "extra syms" that are not in `invar`)).
2056	// ... \| -1 \| 0 \| 0 \| ... \| ... \| >= 0
2057	// ------+-------+---------+-----+---------------+-------+-------------------
2058	// (set `opBound` to `op` upper bound in terms of `invar` and "extra syms")
2059	// ... \| 0 \| -1 \| 0 \| ... \| ... \| = 0
2060	// ------+-------+---------+-----+---------------+-------+-------------------
2061	// (for each `op` map result r_i: set r_i to corresponding map result,
2062	// prove that r_i >= minOpUb via contradiction)
2063	// ... \| 0 \| 0 \| -1 \| ... \| ... \| = 0
2064	// 0 \| 0 \| 1 \| -1 \| 0 \| -1 \| >= 0
2065	//
2066	FailureOr<AffineValueMap> mlir::affine::simplifyConstrainedMinMaxOp(
2067	Operation *op, FlatAffineValueConstraints constraints) {
2068	bool isMin = isa<AffineMinOp>(Val: op);
2069	assert((isMin \|\| isa<AffineMaxOp>(op)) && "expect AffineMin/MaxOp");
2070	MLIRContext *ctx = op->getContext();
2071	Builder builder(ctx);
2072	AffineMap map =
2073	isMin ? cast<AffineMinOp>(op).getMap() : cast<AffineMaxOp>(op).getMap();
2074	ValueRange operands = op->getOperands();
2075	unsigned numResults = map.getNumResults();
2076
2077	// Add a few extra dimensions.
2078	unsigned dimOp = constraints.appendDimVar(); // `op`
2079	unsigned dimOpBound = constraints.appendDimVar(); // `op` lower/upper bound
2080	unsigned resultDimStart = constraints.appendDimVar(/num=/numResults);
2081
2082	// Add an inequality for each result expr_i of map:
2083	// isMin: op <= expr_i, !isMin: op >= expr_i
2084	auto boundType = isMin ? BoundType::UB : BoundType::LB;
2085	// Upper bounds are exclusive, so add 1. (`affine.min` ops are inclusive.)
2086	AffineMap mapLbUb = isMin ? addConstToResults(map, val: `1`) : map;
2087	if (failed(
2088	result: alignAndAddBound(constraints, type: boundType, pos: dimOp, map: mapLbUb, operands)))
2089	return failure();
2090
2091	// Try to compute a lower/upper bound for op, expressed in terms of the other
2092	// `dims` and extra symbols.
2093	SmallVector<AffineMap> opLb(`1`), opUb(`1`);
2094	constraints.getSliceBounds(offset: dimOp, num: `1`, context: ctx, lbMaps: &opLb, ubMaps: &opUb);
2095	AffineMap sliceBound = isMin ? opUb [`0`] : opLb [`0`];
2096	// TODO: `getSliceBounds` may return multiple bounds at the moment. This is
2097	// a TODO of `getSliceBounds` and not handled here.
2098	if (!sliceBound \|\| sliceBound.getNumResults() != `1`)
2099	return failure(); // No or multiple bounds found.
2100	// Recover the inclusive UB in the case of an `affine.min`.
2101	AffineMap boundMap = isMin ? addConstToResults(map: sliceBound, val: -`1`) : sliceBound;
2102
2103	// Add an equality: Set dimOpBound to computed bound.
2104	// Add back dimension for op. (Was removed by `getSliceBounds`.)
2105	AffineMap alignedBoundMap = boundMap.shiftDims(/shift=/`1`, /offset=/dimOp);
2106	if (failed(result: constraints.addBound(type: BoundType::EQ, pos: dimOpBound, boundMap: alignedBoundMap)))
2107	return failure();
2108
2109	// If the constraint system is empty, there is an inconsistency. (E.g., this
2110	// can happen if loop lb > ub.)
2111	if (constraints.isEmpty())
2112	return failure();
2113
2114	// In the case of `isMin` (`!isMin` is inversed):
2115	// Prove that each result of `map` has a lower bound that is equal to (or
2116	// greater than) the upper bound of `op` (`dimOpBound`). In that case, `op`
2117	// can be replaced with the bound. I.e., prove that for each result
2118	// expr_i (represented by dimension r_i):
2119	//
2120	// r_i >= opBound
2121	//
2122	// To prove this inequality, add its negation to the constraint set and prove
2123	// that the constraint set is empty.
2124	for (unsigned i = resultDimStart; i < resultDimStart + numResults; ++i) {
2125	FlatAffineValueConstraints newConstr(constraints);
2126
2127	// Add an equality: r_i = expr_i
2128	// Note: These equalities could have been added earlier and used to express
2129	// minOp <= expr_i. However, then we run the risk that `getSliceBounds`
2130	// computes minOpUb in terms of r_i dims, which is not desired.
2131	if (failed(result: alignAndAddBound(constraints&: newConstr, type: BoundType::EQ, pos: i,
2132	map: map.getSubMap(resultPos: {i - resultDimStart}), operands)))
2133	return failure();
2134
2135	// If `isMin`: Add inequality: r_i < opBound
2136	// equiv.: opBound - r_i - 1 >= 0
2137	// If `!isMin`: Add inequality: r_i > opBound
2138	// equiv.: -opBound + r_i - 1 >= 0
2139	SmallVector<int64_t> ineq(newConstr.getNumCols(), `0`);
2140	ineq [dimOpBound] = isMin ? `1` : -`1`;
2141	ineq [i] = isMin ? -`1` : `1`;
2142	ineq [newConstr.getNumCols() - `1`] = -`1`;
2143	newConstr.addInequality(inEq: ineq);
2144	if (!newConstr.isEmpty())
2145	return failure();
2146	}
2147
2148	// Lower and upper bound of `op` are equal. Replace `minOp` with its bound.
2149	AffineMap newMap = alignedBoundMap;
2150	SmallVector<Value> newOperands;
2151	unpackOptionalValues(source: constraints.getMaybeValues(), target&: newOperands);
2152	// If dims/symbols have known constant values, use those in order to simplify
2153	// the affine map further.
2154	for (int64_t i = `0`, e = constraints.getNumDimAndSymbolVars(); i < e; ++i) {
2155	// Skip unused operands and operands that are already constants.
2156	if (!newOperands [i] \|\| getConstantIntValue(ofr: newOperands [i]))
2157	continue;
2158	if (auto bound = constraints.getConstantBound64(type: BoundType::EQ, pos: i)) {
2159	AffineExpr expr =
2160	i < newMap.getNumDims()
2161	? builder.getAffineDimExpr(position: i)
2162	: builder.getAffineSymbolExpr(position: i - newMap.getNumDims());
2163	newMap = newMap.replace(expr, replacement: builder.getAffineConstantExpr(constant: *bound),
2164	numResultDims: newMap.getNumDims(), numResultSyms: newMap.getNumSymbols());
2165	}
2166	}
2167	affine::canonicalizeMapAndOperands(map: &newMap, operands: &newOperands);
2168	return AffineValueMap (newMap, newOperands);
2169	}
2170

source code of mlir/lib/Dialect/Affine/Analysis/Utils.cpp