EliminateBarriers.cpp source code [mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp]

1	//===- EliminateBarriers.cpp - Eliminate extra barriers --===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// Barrier elimination pattern and pass. If a barrier does not enforce any
10	// conflicting pair of memory effects, including a pair that is enforced by
11	// another barrier, it is unnecessary and can be removed. Adapted from
12	// "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level
13	// Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in
14	// PPoPP 2023 and implementation in Polygeist.
15	//
16	//===----------------------------------------------------------------------===//
17
18	#include "mlir/Dialect/Func/IR/FuncOps.h"
19	#include "mlir/Dialect/GPU/IR/GPUDialect.h"
20	#include "mlir/Dialect/GPU/Transforms/Passes.h"
21	#include "mlir/Dialect/MemRef/IR/MemRef.h"
22	#include "mlir/Dialect/SCF/IR/SCF.h"
23	#include "mlir/Dialect/Vector/IR/VectorOps.h"
24	#include "mlir/IR/Operation.h"
25	#include "mlir/Pass/Pass.h"
26	#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
27	#include "llvm/ADT/TypeSwitch.h"
28	#include "llvm/Support/Debug.h"
29
30	namespace mlir {
31	#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
32	#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
33	} // namespace mlir
34
35	using namespace mlir;
36	using namespace mlir::gpu;
37
38	#define DEBUG_TYPE "gpu-erase-barriers"
39	#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
40
41	#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
42	#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
43
44	// The functions below provide interface-like verification, but are too specific
45	// to barrier elimination to become interfaces.
46
47	/// Implement the MemoryEffectsOpInterface in the suitable way.
48	static bool isKnownNoEffectsOpWithoutInterface(Operation *op) {
49	// memref::AssumeAlignment is conceptually pure, but marking it as such would
50	// make DCE immediately remove it.
51	return isa<memref::AssumeAlignmentOp>(op);
52	}
53
54	/// Returns `true` if the op is defines the parallel region that is subject to
55	/// barrier synchronization.
56	static bool isParallelRegionBoundary(Operation *op) {
57	if (op->hasAttr(name: "__parallel_region_boundary_for_test"))
58	return true;
59
60	return isa<GPUFuncOp, LaunchOp>(Val: op);
61	}
62
63	/// Returns `true` if the op behaves like a sequential loop, e.g., the control
64	/// flow "wraps around" from the end of the body region back to its start.
65	static bool isSequentialLoopLike(Operation op) { return* isa<scf::ForOp>(op); }
66
67	/// Returns `true` if the regions of the op are guaranteed to be executed at
68	/// most once. Thus, if an operation in one of the nested regions of `op` is
69	/// executed than so are all the other operations in this region.
70	static bool hasSingleExecutionBody(Operation *op) {
71	return isa<scf::IfOp, memref::AllocaScopeOp>(op);
72	}
73
74	/// Returns `true` if the operation is known to produce a pointer-like object
75	/// distinct from any other object produced by a similar operation. For example,
76	/// an allocation produces such an object.
77	static bool producesDistinctBase(Operation *op) {
78	return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
79	}
80
81	/// Populates `effects` with all memory effects without associating them to a
82	/// specific value.
83	static void addAllValuelessEffects(
84	SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
85	effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Read>());
86	effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Write>());
87	effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Allocate>());
88	effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Free>());
89	}
90
91	/// Collect the memory effects of the given op in 'effects'. Returns 'true' if
92	/// it could extract the effect information from the op, otherwise returns
93	/// 'false' and conservatively populates the list with all possible effects
94	/// associated with no particular value or symbol.
95	static bool
96	collectEffects(Operation *op,
97	SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
98	bool ignoreBarriers = true) {
99	// Skip over barriers to avoid infinite recursion (those barriers would ask
100	// this barrier again).
101	if (ignoreBarriers && isa<BarrierOp>(op))
102	return true;
103
104	// Skip over ops that we know have no effects.
105	if (isKnownNoEffectsOpWithoutInterface(op))
106	return true;
107
108	// Collect effect instances the operation. Note that the implementation of
109	// getEffects erases all effect instances that have the type other than the
110	// template parameter so we collect them first in a local buffer and then
111	// copy.
112	if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
113	SmallVector<MemoryEffects::EffectInstance> localEffects;
114	iface.getEffects(localEffects);
115	llvm::append_range(C&: effects, R&: localEffects);
116	return true;
117	}
118	if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
119	for (auto &region : op->getRegions()) {
120	for (auto &block : region) {
121	for (auto &innerOp : block)
122	if (!collectEffects(op: &innerOp, effects, ignoreBarriers))
123	return false;
124	}
125	}
126	return true;
127	}
128
129	// We need to be conservative here in case the op doesn't have the interface
130	// and assume it can have any possible effect.
131	addAllValuelessEffects(effects);
132	return false;
133	}
134
135	/// Collects memory effects from operations that may be executed before `op` in
136	/// a trivial structured control flow, e.g., without branches. Stops at the
137	/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
138	/// set. Returns `true` if the memory effects added to `effects` are exact,
139	/// `false` if they are a conservative over-approximation. The latter means that
140	/// `effects` contain instances not associated with a specific value.
141	static bool
142	getEffectsBefore(Operation *op,
143	SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
144	bool stopAtBarrier) {
145	if (!op->getBlock())
146	return true;
147
148	// If there is a non-structured control flow, bail.
149	Region *region = op->getBlock()->getParent();
150	if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
151	addAllValuelessEffects(effects);
152	return false;
153	}
154
155	// Collect all effects before the op.
156	if (op != &op->getBlock()->front()) {
157	for (Operation it = op->getPrevNode(); it != nullptr*;
158	it = it->getPrevNode()) {
159	if (isa<BarrierOp>(it)) {
160	if (stopAtBarrier)
161	return true;
162	else
163	continue;
164	}
165	if (!collectEffects(op: it, effects))
166	return false;
167	}
168	}
169
170	// Stop if reached the parallel region boundary.
171	if (isParallelRegionBoundary(op: op->getParentOp()))
172	return true;
173
174	// Otherwise, keep collecting above the parent operation.
175	if (!getEffectsBefore(op: op->getParentOp(), effects, stopAtBarrier))
176	return false;
177
178	// If the op is loop-like, collect effects from the trailing operations until
179	// we hit a barrier because they can executed before the current operation by
180	// the previous iteration of this loop. For example, in the following loop
181	//
182	// for i = ... {
183	// op1
184	// ...
185	// barrier
186	// op2
187	// }
188	//
189	// the operation `op2` at iteration `i` is known to be executed before the
190	// operation `op1` at iteration `i+1` and the side effects must be ordered
191	// appropriately.
192	if (isSequentialLoopLike(op: op->getParentOp())) {
193	// Assuming loop terminators have no side effects.
194	return getEffectsBefore(op: op->getBlock()->getTerminator(), effects,
195	/stopAtBarrier=/true);
196	}
197
198	// If the parent operation is not guaranteed to execute its (single-block)
199	// region once, walk the block.
200	bool conservative = false;
201	if (!hasSingleExecutionBody(op: op->getParentOp()))
202	op->getParentOp()->walk(callback: [&](Operation *in) {
203	if (conservative)
204	return WalkResult::interrupt();
205	if (!collectEffects(op: in, effects)) {
206	conservative = true;
207	return WalkResult::interrupt();
208	}
209	return WalkResult::advance();
210	});
211
212	return !conservative;
213	}
214
215	/// Collects memory effects from operations that may be executed after `op` in
216	/// a trivial structured control flow, e.g., without branches. Stops at the
217	/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
218	/// set. Returns `true` if the memory effects added to `effects` are exact,
219	/// `false` if they are a conservative over-approximation. The latter means that
220	/// `effects` contain instances not associated with a specific value.
221	static bool
222	getEffectsAfter(Operation *op,
223	SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
224	bool stopAtBarrier) {
225	if (!op->getBlock())
226	return true;
227
228	// If there is a non-structured control flow, bail.
229	Region *region = op->getBlock()->getParent();
230	if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
231	addAllValuelessEffects(effects);
232	return false;
233	}
234
235	// Collect all effects after the op.
236	if (op != &op->getBlock()->back())
237	for (Operation it = op->getNextNode(); it != nullptr*;
238	it = it->getNextNode()) {
239	if (isa<BarrierOp>(it)) {
240	if (stopAtBarrier)
241	return true;
242	continue;
243	}
244	if (!collectEffects(op: it, effects))
245	return false;
246	}
247
248	// Stop if reached the parallel region boundary.
249	if (isParallelRegionBoundary(op: op->getParentOp()))
250	return true;
251
252	// Otherwise, keep collecting below the parent operation.
253	if (!getEffectsAfter(op: op->getParentOp(), effects, stopAtBarrier))
254	return false;
255
256	// If the op is loop-like, collect effects from the leading operations until
257	// we hit a barrier because they can executed after the current operation by
258	// the next iteration of this loop. For example, in the following loop
259	//
260	// for i = ... {
261	// op1
262	// ...
263	// barrier
264	// op2
265	// }
266	//
267	// the operation `op1` at iteration `i` is known to be executed after the
268	// operation `op2` at iteration `i-1` and the side effects must be ordered
269	// appropriately.
270	if (isSequentialLoopLike(op: op->getParentOp())) {
271	if (isa<BarrierOp>(op->getBlock()->front()))
272	return true;
273
274	bool exact = collectEffects(op: &op->getBlock()->front(), effects);
275	return getEffectsAfter(op: &op->getBlock()->front(), effects,
276	/stopAtBarrier=/true) &&
277	exact;
278	}
279
280	// If the parent operation is not guaranteed to execute its (single-block)
281	// region once, walk the block.
282	bool conservative = false;
283	if (!hasSingleExecutionBody(op: op->getParentOp()))
284	op->getParentOp()->walk(callback: [&](Operation *in) {
285	if (conservative)
286	return WalkResult::interrupt();
287	if (!collectEffects(op: in, effects)) {
288	conservative = true;
289	return WalkResult::interrupt();
290	}
291	return WalkResult::advance();
292	});
293
294	return !conservative;
295	}
296
297	/// Looks through known "view-like" ops to find the base memref.
298	static Value getBase(Value v) {
299	while (true) {
300	Operation *definingOp = v.getDefiningOp();
301	if (!definingOp)
302	break;
303
304	bool shouldContinue =
305	TypeSwitch<Operation , bool*>(v.getDefiningOp())
306	.Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
307	[&](auto op) {
308	v = op.getSource();
309	return true;
310	})
311	.Case<memref::TransposeOp>([&](auto op) {
312	v = op.getIn();
313	return true;
314	})
315	.Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) {
316	v = op.getSrc();
317	return true;
318	})
319	.Default([](Operation ) { return* false; });
320	if (!shouldContinue)
321	break;
322	}
323	return v;
324	}
325
326	/// Returns `true` if the value is defined as a function argument.
327	static bool isFunctionArgument(Value v) {
328	auto arg = dyn_cast<BlockArgument>(Val&: v);
329	return arg && isa<FunctionOpInterface>(Val: arg.getOwner()->getParentOp());
330	}
331
332	/// Returns the operand that the operation "propagates" through it for capture
333	/// purposes. That is, if the value produced by this operation is captured, then
334	/// so is the returned value.
335	static Value propagatesCapture(Operation *op) {
336	return llvm::TypeSwitch<Operation *, Value>(op)
337	.Case(
338	[](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
339	.Case([](CastOpInterface castLike) { return castLike->getOperand(`0`); })
340	.Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
341	.Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
342	[](auto op) { return op.getSrc(); })
343	.Default([](Operation ) { return* Value(); });
344	}
345
346	/// Returns `true` if the given operation is known to capture the given value,
347	/// `false` if it is known not to capture the given value, `nullopt` if neither
348	/// is known.
349	static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
350	return llvm::TypeSwitch<Operation , std::optional<bool*>>(op)
351	// Store-like operations don't capture the destination, but do capture
352	// the value.
353	.Case<memref::StoreOp, vector::TransferWriteOp>(
354	[&](auto op) { return op.getValue() == v; })
355	.Case<vector::StoreOp, vector::MaskedStoreOp>(
356	[&](auto op) { return op.getValueToStore() == v; })
357	// These operations are known not to capture.
358	.Case([](memref::DeallocOp) { return false; })
359	// By default, we don't know anything.
360	.Default([](Operation ) { return* std::nullopt; });
361	}
362
363	/// Returns `true` if the value may be captured by any of its users, i.e., if
364	/// the user may be storing this value into memory. This makes aliasing analysis
365	/// more conservative as it cannot assume the pointer-like value is only passed
366	/// around through SSA use-def.
367	static bool maybeCaptured(Value v) {
368	SmallVector<Value> todo = {v};
369	while (!todo.empty()) {
370	Value v = todo.pop_back_val();
371	for (Operation *user : v.getUsers()) {
372	// A user that is known to only read cannot capture.
373	auto iface = dyn_cast<MemoryEffectOpInterface>(user);
374	if (iface) {
375	SmallVector<MemoryEffects::EffectInstance> effects;
376	iface.getEffects(effects);
377	if (llvm::all_of(Range&: effects,
378	P: [](const MemoryEffects::EffectInstance &effect) {
379	return isa<MemoryEffects::Read>(Val: effect.getEffect());
380	})) {
381	continue;
382	}
383	}
384
385	// When an operation is known to create an alias, consider if the
386	// source is captured as well.
387	if (Value v = propagatesCapture(op: user)) {
388	todo.push_back(Elt: v);
389	continue;
390	}
391
392	std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(op: user, v);
393	if (!knownCaptureStatus \|\| *knownCaptureStatus)
394	return true;
395	}
396	}
397
398	return false;
399	}
400
401	/// Returns true if two values may be referencing aliasing memory. This is a
402	/// rather naive and conservative analysis. Values defined by different
403	/// allocation-like operations as well as values derived from those by casts and
404	/// views cannot alias each other. Similarly, values defined by allocations
405	/// inside a function cannot alias function arguments. Global values cannot
406	/// alias each other or local allocations. Values that are captured, i.e.
407	/// themselves potentially stored in memory, are considered as aliasing with
408	/// everything. This seems sufficient to achieve barrier removal in structured
409	/// control flow, more complex cases would require a proper dataflow analysis.
410	static bool mayAlias(Value first, Value second) {
411	DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
412	DBGS_ALIAS() << "checking aliasing between ";
413	DBGS_ALIAS() << first << "\n";
414	DBGS_ALIAS() << " and ";
415	DBGS_ALIAS() << second << "\n";
416	});
417
418	first = getBase(v: first);
419	second = getBase(v: second);
420
421	DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
422	DBGS_ALIAS() << "base ";
423	DBGS_ALIAS() << first << "\n";
424	DBGS_ALIAS() << " and ";
425	DBGS_ALIAS() << second << "\n";
426	});
427
428	// Values derived from the same base memref do alias (unless we do a more
429	// advanced analysis to prove non-overlapping accesses).
430	if (first == second) {
431	DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
432	return true;
433	}
434
435	// Different globals cannot alias.
436	if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
437	if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
438	return globFirst.getNameAttr() == globSecond.getNameAttr();
439	}
440	}
441
442	// Two function arguments marked as noalias do not alias.
443	auto isNoaliasFuncArgument = [](Value value) {
444	auto bbArg = dyn_cast<BlockArgument>(Val&: value);
445	if (!bbArg)
446	return false;
447	auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
448	if (!iface)
449	return false;
450	// TODO: we need a way to not depend on the LLVM dialect here.
451	return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
452	};
453	if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
454	return false;
455
456	bool isDistinct[] = {producesDistinctBase(op: first.getDefiningOp()),
457	producesDistinctBase(op: second.getDefiningOp())};
458	bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
459	second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
460
461	// Non-equivalent distinct bases and globals cannot alias. At this point, we
462	// have already filtered out based on values being equal and global name being
463	// equal.
464	if ((isDistinct[`0`] \|\| isGlobal[`0`]) && (isDistinct[`1`] \|\| isGlobal[`1`]))
465	return false;
466
467	bool isArg[] = {isFunctionArgument(v: first), isFunctionArgument(v: second)};
468
469	// Distinct bases (allocations) cannot have been passed as an argument.
470	if ((isDistinct[`0`] && isArg[`1`]) \|\| (isDistinct[`1`] && isArg[`0`]))
471	return false;
472
473	// Non-captured base distinct values cannot conflict with another base value.
474	if (isDistinct[`0`] && !maybeCaptured(v: first))
475	return false;
476	if (isDistinct[`1`] && !maybeCaptured(v: second))
477	return false;
478
479	// Otherwise, conservatively assume aliasing.
480	DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
481	return true;
482	}
483
484	/// Returns `true` if the effect may be affecting memory aliasing the value. If
485	/// the effect is not associated with any value, it is assumed to affect all
486	/// memory and therefore aliases with everything.
487	static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) {
488	if (Value v = a.getValue()) {
489	return mayAlias(first: v, second: v2);
490	}
491	return true;
492	}
493
494	/// Returns `true` if the two effects may be affecting aliasing memory. If
495	/// an effect is not associated with any value, it is assumed to affect all
496	/// memory and therefore aliases with everything. Effects on different resources
497	/// cannot alias.
498	static bool mayAlias(MemoryEffects::EffectInstance a,
499	MemoryEffects::EffectInstance b) {
500	if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
501	return false;
502	if (Value v2 = b.getValue()) {
503	return mayAlias(a, v2);
504	} else if (Value v = a.getValue()) {
505	return mayAlias(a: b, v2: v);
506	}
507	return true;
508	}
509
510	/// Returns `true` if any of the "before" effect instances has a conflict with
511	/// any "after" instance for the purpose of barrier elimination. The effects are
512	/// supposed to be limited to a barrier synchronization scope. A conflict exists
513	/// if effects instances affect aliasing memory locations and at least on of
514	/// then as a write. As an exception, if the non-write effect is an allocation
515	/// effect, there is no conflict since we are only expected to see the
516	/// allocation happening in the same thread and it cannot be accessed from
517	/// another thread without capture (which we do handle in alias analysis).
518	static bool
519	haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
520	ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
521	for (const MemoryEffects::EffectInstance &before : beforeEffects) {
522	for (const MemoryEffects::EffectInstance &after : afterEffects) {
523	// If cannot alias, definitely no conflict.
524	if (!mayAlias(a: before, b: after))
525	continue;
526
527	// Read/read is not a conflict.
528	if (isa<MemoryEffects::Read>(Val: before.getEffect()) &&
529	isa<MemoryEffects::Read>(Val: after.getEffect())) {
530	continue;
531	}
532
533	// Allocate/ is not a conflict since the allocation happens within the*
534	// thread context.
535	// TODO: This is not the case for /Free unless the allocation happened in*
536	// the thread context, which we could also check for.
537	if (isa<MemoryEffects::Allocate>(Val: before.getEffect()) \|\|
538	isa<MemoryEffects::Allocate>(Val: after.getEffect())) {
539	continue;
540	}
541
542	// In the particular case that the before effect is a free, we only have 2
543	// possibilities:
544	// 1. either the program is well-formed and there must be an interleaved
545	// alloc that must limit the scope of effect lookback and we can
546	// safely ignore the free -> read / free -> write and free -> free
547	// conflicts.
548	// 2. either the program is ill-formed and we are in undefined behavior
549	// territory.
550	if (isa<MemoryEffects::Free>(Val: before.getEffect()))
551	continue;
552
553	// Other kinds of effects create a conflict, e.g. read-after-write.
554	LLVM_DEBUG(
555	DBGS() << "found a conflict between (before): " << before.getValue()
556	<< " read:" << isa<MemoryEffects::Read>(before.getEffect())
557	<< " write:" << isa<MemoryEffects::Write>(before.getEffect())
558	<< " alloc:"
559	<< isa<MemoryEffects::Allocate>(before.getEffect()) << " free:"
560	<< isa<MemoryEffects::Free>(before.getEffect()) << "\n");
561	LLVM_DEBUG(
562	DBGS() << "and (after): " << after.getValue()
563	<< " read:" << isa<MemoryEffects::Read>(after.getEffect())
564	<< " write:" << isa<MemoryEffects::Write>(after.getEffect())
565	<< " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
566	<< " free:" << isa<MemoryEffects::Free>(after.getEffect())
567	<< "\n");
568	return true;
569	}
570	}
571
572	return false;
573	}
574
575	namespace {
576	class BarrierElimination final : public OpRewritePattern<BarrierOp> {
577	public:
578	using OpRewritePattern<BarrierOp>::OpRewritePattern;
579
580	LogicalResult matchAndRewrite(BarrierOp barrier,
581	PatternRewriter &rewriter) const override {
582	LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " "
583	<< barrier.getLoc() << "\n");
584
585	SmallVector<MemoryEffects::EffectInstance> beforeEffects;
586	getEffectsBefore(barrier, beforeEffects, /stopAtBarrier=/true);
587
588	SmallVector<MemoryEffects::EffectInstance> afterEffects;
589	getEffectsAfter(barrier, afterEffects, /stopAtBarrier=/true);
590
591	if (!haveConflictingEffects(beforeEffects, afterEffects)) {
592	LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing "
593	<< barrier << "\n");
594	rewriter.eraseOp(op: barrier);
595	return success();
596	}
597
598	LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " "
599	<< barrier.getLoc() << "\n");
600	return failure();
601	}
602	};
603
604	class GpuEliminateBarriersPass
605	: public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
606	void runOnOperation() override {
607	auto funcOp = getOperation();
608	RewritePatternSet patterns(&getContext());
609	mlir::populateGpuEliminateBarriersPatterns(patterns);
610	if (failed(applyPatternsAndFoldGreedily(funcOp, std::move(patterns)))) {
611	return signalPassFailure();
612	}
613	}
614	};
615
616	} // namespace
617
618	void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) {
619	patterns.insert<BarrierElimination>(arg: patterns.getContext());
620	}
621

Provided by KDAB

Definitions

source code of mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp