1//===- EliminateBarriers.cpp - Eliminate extra barriers --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Barrier elimination pattern and pass. If a barrier does not enforce any
10// conflicting pair of memory effects, including a pair that is enforced by
11// another barrier, it is unnecessary and can be removed. Adapted from
12// "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level
13// Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in
14// PPoPP 2023 and implementation in Polygeist.
15//
16//===----------------------------------------------------------------------===//
17
18#include "mlir/Dialect/Func/IR/FuncOps.h"
19#include "mlir/Dialect/GPU/IR/GPUDialect.h"
20#include "mlir/Dialect/GPU/Transforms/Passes.h"
21#include "mlir/Dialect/MemRef/IR/MemRef.h"
22#include "mlir/Dialect/SCF/IR/SCF.h"
23#include "mlir/Dialect/Vector/IR/VectorOps.h"
24#include "mlir/IR/Operation.h"
25#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
26#include "llvm/ADT/TypeSwitch.h"
27#include "llvm/Support/Debug.h"
28
29namespace mlir {
30#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
31#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
32} // namespace mlir
33
34using namespace mlir;
35using namespace mlir::gpu;
36
37#define DEBUG_TYPE "gpu-erase-barriers"
38#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
39
40#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
41#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
42
43// The functions below provide interface-like verification, but are too specific
44// to barrier elimination to become interfaces.
45
46/// Returns `true` if the op is defines the parallel region that is subject to
47/// barrier synchronization.
48static bool isParallelRegionBoundary(Operation *op) {
49 if (op->hasAttr(name: "__parallel_region_boundary_for_test"))
50 return true;
51
52 return isa<GPUFuncOp, LaunchOp>(Val: op);
53}
54
55/// Returns `true` if the op behaves like a sequential loop, e.g., the control
56/// flow "wraps around" from the end of the body region back to its start.
57static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(Val: op); }
58
59/// Returns `true` if the regions of the op are guaranteed to be executed at
60/// most once. Thus, if an operation in one of the nested regions of `op` is
61/// executed than so are all the other operations in this region.
62static bool hasSingleExecutionBody(Operation *op) {
63 return isa<FunctionOpInterface, scf::IfOp, memref::AllocaScopeOp>(Val: op);
64}
65
66/// Returns `true` if the operation is known to produce a pointer-like object
67/// distinct from any other object produced by a similar operation. For example,
68/// an allocation produces such an object.
69static bool producesDistinctBase(Operation *op) {
70 return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(Val: op);
71}
72
73/// Populates `effects` with all memory effects without associating them to a
74/// specific value.
75static void addAllValuelessEffects(
76 SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
77 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Read>());
78 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Write>());
79 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Allocate>());
80 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Free>());
81}
82
83/// Collect the memory effects of the given op in 'effects'. Returns 'true' if
84/// it could extract the effect information from the op, otherwise returns
85/// 'false' and conservatively populates the list with all possible effects
86/// associated with no particular value or symbol.
87static bool
88collectEffects(Operation *op,
89 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
90 bool ignoreBarriers = true) {
91 // Skip over barriers to avoid infinite recursion (those barriers would ask
92 // this barrier again).
93 if (ignoreBarriers && isa<BarrierOp>(Val: op))
94 return true;
95
96 // Collect effect instances the operation. Note that the implementation of
97 // getEffects erases all effect instances that have the type other than the
98 // template parameter so we collect them first in a local buffer and then
99 // copy.
100 if (auto iface = dyn_cast<MemoryEffectOpInterface>(Val: op)) {
101 SmallVector<MemoryEffects::EffectInstance> localEffects;
102 iface.getEffects(effects&: localEffects);
103 llvm::append_range(C&: effects, R&: localEffects);
104 return true;
105 }
106 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
107 for (auto &region : op->getRegions()) {
108 for (auto &block : region) {
109 for (auto &innerOp : block)
110 if (!collectEffects(op: &innerOp, effects, ignoreBarriers))
111 return false;
112 }
113 }
114 return true;
115 }
116
117 // We need to be conservative here in case the op doesn't have the interface
118 // and assume it can have any possible effect.
119 addAllValuelessEffects(effects);
120 return false;
121}
122
123/// Get all effects before the given operation caused by other operations in the
124/// same block. That is, this will not consider operations beyond the block.
125static bool
126getEffectsBeforeInBlock(Operation *op,
127 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
128 bool stopAtBarrier) {
129 if (op == &op->getBlock()->front())
130 return true;
131
132 for (Operation *it = op->getPrevNode(); it != nullptr;
133 it = it->getPrevNode()) {
134 if (isa<BarrierOp>(Val: it)) {
135 if (stopAtBarrier)
136 return true;
137 continue;
138 }
139
140 if (!collectEffects(op: it, effects))
141 return false;
142 }
143 return true;
144}
145
146/// Collects memory effects from operations that may be executed before `op` in
147/// a trivial structured control flow, e.g., without branches. Stops at the
148/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
149/// set. Returns `true` if the memory effects added to `effects` are exact,
150/// `false` if they are a conservative over-approximation. The latter means that
151/// `effects` contain instances not associated with a specific value.
152static bool
153getEffectsBefore(Operation *op,
154 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
155 bool stopAtBarrier) {
156 if (!op->getBlock())
157 return true;
158
159 // If there is a non-structured control flow, bail.
160 Region *region = op->getBlock()->getParent();
161 if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
162 addAllValuelessEffects(effects);
163 return false;
164 }
165
166 // Collect all effects before the op.
167 getEffectsBeforeInBlock(op, effects, stopAtBarrier);
168
169 // Stop if reached the parallel region boundary.
170 if (isParallelRegionBoundary(op: op->getParentOp()))
171 return true;
172
173 Operation *parent = op->getParentOp();
174 // Otherwise, keep collecting above the parent operation.
175 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
176 !getEffectsBefore(op: parent, effects, stopAtBarrier))
177 return false;
178
179 // If the op is loop-like, collect effects from the trailing operations until
180 // we hit a barrier because they can executed before the current operation by
181 // the previous iteration of this loop. For example, in the following loop
182 //
183 // for i = ... {
184 // op1
185 // ...
186 // barrier
187 // op2
188 // }
189 //
190 // the operation `op2` at iteration `i` is known to be executed before the
191 // operation `op1` at iteration `i+1` and the side effects must be ordered
192 // appropriately.
193 if (isSequentialLoopLike(op: parent)) {
194 // Assuming loop terminators have no side effects.
195 return getEffectsBeforeInBlock(op: op->getBlock()->getTerminator(), effects,
196 /*stopAtBarrier=*/true);
197 }
198
199 // If the parent operation is not guaranteed to execute its (single-block)
200 // region once, walk the block.
201 bool conservative = false;
202 if (!hasSingleExecutionBody(op: op->getParentOp()))
203 op->getParentOp()->walk(callback: [&](Operation *in) {
204 if (conservative)
205 return WalkResult::interrupt();
206 if (!collectEffects(op: in, effects)) {
207 conservative = true;
208 return WalkResult::interrupt();
209 }
210 return WalkResult::advance();
211 });
212
213 return !conservative;
214}
215
216/// Get all effects after the given operation caused by other operations in the
217/// same block. That is, this will not consider operations beyond the block.
218static bool
219getEffectsAfterInBlock(Operation *op,
220 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
221 bool stopAtBarrier) {
222 if (op == &op->getBlock()->back())
223 return true;
224
225 for (Operation *it = op->getNextNode(); it != nullptr;
226 it = it->getNextNode()) {
227 if (isa<BarrierOp>(Val: it)) {
228 if (stopAtBarrier)
229 return true;
230 continue;
231 }
232 if (!collectEffects(op: it, effects))
233 return false;
234 }
235 return true;
236}
237
238/// Collects memory effects from operations that may be executed after `op` in
239/// a trivial structured control flow, e.g., without branches. Stops at the
240/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
241/// set. Returns `true` if the memory effects added to `effects` are exact,
242/// `false` if they are a conservative over-approximation. The latter means that
243/// `effects` contain instances not associated with a specific value.
244static bool
245getEffectsAfter(Operation *op,
246 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
247 bool stopAtBarrier) {
248 if (!op->getBlock())
249 return true;
250
251 // If there is a non-structured control flow, bail.
252 Region *region = op->getBlock()->getParent();
253 if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
254 addAllValuelessEffects(effects);
255 return false;
256 }
257
258 // Collect all effects after the op.
259 getEffectsAfterInBlock(op, effects, stopAtBarrier);
260
261 Operation *parent = op->getParentOp();
262 // Stop if reached the parallel region boundary.
263 if (isParallelRegionBoundary(op: parent))
264 return true;
265
266 // Otherwise, keep collecting below the parent operation.
267 // Don't look into, for example, neighboring functions
268 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
269 !getEffectsAfter(op: parent, effects, stopAtBarrier))
270 return false;
271
272 // If the op is loop-like, collect effects from the leading operations until
273 // we hit a barrier because they can executed after the current operation by
274 // the next iteration of this loop. For example, in the following loop
275 //
276 // for i = ... {
277 // op1
278 // ...
279 // barrier
280 // op2
281 // }
282 //
283 // the operation `op1` at iteration `i` is known to be executed after the
284 // operation `op2` at iteration `i-1` and the side effects must be ordered
285 // appropriately.
286 if (isSequentialLoopLike(op: parent)) {
287 if (isa<BarrierOp>(Val: op->getBlock()->front()))
288 return true;
289
290 bool exact = collectEffects(op: &op->getBlock()->front(), effects);
291 return getEffectsAfterInBlock(op: &op->getBlock()->front(), effects,
292 /*stopAtBarrier=*/true) &&
293 exact;
294 }
295
296 // If the parent operation is not guaranteed to execute its (single-block)
297 // region once, walk the block.
298 bool conservative = false;
299 if (!hasSingleExecutionBody(op: op->getParentOp()))
300 op->getParentOp()->walk(callback: [&](Operation *in) {
301 if (conservative)
302 return WalkResult::interrupt();
303 if (!collectEffects(op: in, effects)) {
304 conservative = true;
305 return WalkResult::interrupt();
306 }
307 return WalkResult::advance();
308 });
309
310 return !conservative;
311}
312
313/// Looks through known "view-like" ops to find the base memref.
314static Value getBase(Value v) {
315 while (true) {
316 Operation *definingOp = v.getDefiningOp();
317 if (!definingOp)
318 break;
319
320 bool shouldContinue =
321 TypeSwitch<Operation *, bool>(v.getDefiningOp())
322 .Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
323 caseFn: [&](auto op) {
324 v = op.getSource();
325 return true;
326 })
327 .Case<memref::TransposeOp>(caseFn: [&](auto op) {
328 v = op.getIn();
329 return true;
330 })
331 .Case<memref::CollapseShapeOp, memref::ExpandShapeOp>(caseFn: [&](auto op) {
332 v = op.getSrc();
333 return true;
334 })
335 .Default(defaultFn: [](Operation *) { return false; });
336 if (!shouldContinue)
337 break;
338 }
339 return v;
340}
341
342/// Returns `true` if the value is defined as a function argument.
343static bool isFunctionArgument(Value v) {
344 auto arg = dyn_cast<BlockArgument>(Val&: v);
345 return arg && isa<FunctionOpInterface>(Val: arg.getOwner()->getParentOp());
346}
347
348/// Returns the operand that the operation "propagates" through it for capture
349/// purposes. That is, if the value produced by this operation is captured, then
350/// so is the returned value.
351static Value propagatesCapture(Operation *op) {
352 return llvm::TypeSwitch<Operation *, Value>(op)
353 .Case(
354 caseFn: [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
355 .Case(caseFn: [](CastOpInterface castLike) { return castLike->getOperand(idx: 0); })
356 .Case(caseFn: [](memref::TransposeOp transpose) { return transpose.getIn(); })
357 .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
358 caseFn: [](auto op) { return op.getSrc(); })
359 .Default(defaultFn: [](Operation *) { return Value(); });
360}
361
362/// Returns `true` if the given operation is known to capture the given value,
363/// `false` if it is known not to capture the given value, `nullopt` if neither
364/// is known.
365static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
366 return llvm::TypeSwitch<Operation *, std::optional<bool>>(op)
367 // Store-like operations don't capture the destination, but do capture
368 // the value.
369 .Case<memref::StoreOp, vector::TransferWriteOp>(
370 caseFn: [&](auto op) { return op.getValue() == v; })
371 .Case<vector::StoreOp, vector::MaskedStoreOp>(
372 caseFn: [&](auto op) { return op.getValueToStore() == v; })
373 // These operations are known not to capture.
374 .Case(caseFn: [](memref::DeallocOp) { return false; })
375 // By default, we don't know anything.
376 .Default(defaultFn: [](Operation *) { return std::nullopt; });
377}
378
379/// Returns `true` if the value may be captured by any of its users, i.e., if
380/// the user may be storing this value into memory. This makes aliasing analysis
381/// more conservative as it cannot assume the pointer-like value is only passed
382/// around through SSA use-def.
383static bool maybeCaptured(Value v) {
384 SmallVector<Value> todo = {v};
385 while (!todo.empty()) {
386 Value v = todo.pop_back_val();
387 for (Operation *user : v.getUsers()) {
388 // A user that is known to only read cannot capture.
389 auto iface = dyn_cast<MemoryEffectOpInterface>(Val: user);
390 if (iface) {
391 SmallVector<MemoryEffects::EffectInstance> effects;
392 iface.getEffects(effects);
393 if (llvm::all_of(Range&: effects,
394 P: [](const MemoryEffects::EffectInstance &effect) {
395 return isa<MemoryEffects::Read>(Val: effect.getEffect());
396 })) {
397 continue;
398 }
399 }
400
401 // When an operation is known to create an alias, consider if the
402 // source is captured as well.
403 if (Value v = propagatesCapture(op: user)) {
404 todo.push_back(Elt: v);
405 continue;
406 }
407
408 std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(op: user, v);
409 if (!knownCaptureStatus || *knownCaptureStatus)
410 return true;
411 }
412 }
413
414 return false;
415}
416
417/// Returns true if two values may be referencing aliasing memory. This is a
418/// rather naive and conservative analysis. Values defined by different
419/// allocation-like operations as well as values derived from those by casts and
420/// views cannot alias each other. Similarly, values defined by allocations
421/// inside a function cannot alias function arguments. Global values cannot
422/// alias each other or local allocations. Values that are captured, i.e.
423/// themselves potentially stored in memory, are considered as aliasing with
424/// everything. This seems sufficient to achieve barrier removal in structured
425/// control flow, more complex cases would require a proper dataflow analysis.
426static bool mayAlias(Value first, Value second) {
427 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
428 DBGS_ALIAS() << "checking aliasing between ";
429 DBGS_ALIAS() << first << "\n";
430 DBGS_ALIAS() << " and ";
431 DBGS_ALIAS() << second << "\n";
432 });
433
434 first = getBase(v: first);
435 second = getBase(v: second);
436
437 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
438 DBGS_ALIAS() << "base ";
439 DBGS_ALIAS() << first << "\n";
440 DBGS_ALIAS() << " and ";
441 DBGS_ALIAS() << second << "\n";
442 });
443
444 // Values derived from the same base memref do alias (unless we do a more
445 // advanced analysis to prove non-overlapping accesses).
446 if (first == second) {
447 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
448 return true;
449 }
450
451 // Different globals cannot alias.
452 if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
453 if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
454 return globFirst.getNameAttr() == globSecond.getNameAttr();
455 }
456 }
457
458 // Two function arguments marked as noalias do not alias.
459 auto isNoaliasFuncArgument = [](Value value) {
460 auto bbArg = dyn_cast<BlockArgument>(Val&: value);
461 if (!bbArg)
462 return false;
463 auto iface = dyn_cast<FunctionOpInterface>(Val: bbArg.getOwner()->getParentOp());
464 if (!iface)
465 return false;
466 // TODO: we need a way to not depend on the LLVM dialect here.
467 return iface.getArgAttr(index: bbArg.getArgNumber(), name: "llvm.noalias") != nullptr;
468 };
469 if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
470 return false;
471
472 bool isDistinct[] = {producesDistinctBase(op: first.getDefiningOp()),
473 producesDistinctBase(op: second.getDefiningOp())};
474 bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
475 second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
476
477 // Non-equivalent distinct bases and globals cannot alias. At this point, we
478 // have already filtered out based on values being equal and global name being
479 // equal.
480 if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
481 return false;
482
483 bool isArg[] = {isFunctionArgument(v: first), isFunctionArgument(v: second)};
484
485 // Distinct bases (allocations) cannot have been passed as an argument.
486 if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
487 return false;
488
489 // Non-captured base distinct values cannot conflict with another base value.
490 if (isDistinct[0] && !maybeCaptured(v: first))
491 return false;
492 if (isDistinct[1] && !maybeCaptured(v: second))
493 return false;
494
495 // Otherwise, conservatively assume aliasing.
496 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
497 return true;
498}
499
500/// Returns `true` if the effect may be affecting memory aliasing the value. If
501/// the effect is not associated with any value, it is assumed to affect all
502/// memory and therefore aliases with everything.
503static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) {
504 if (Value v = a.getValue()) {
505 return mayAlias(first: v, second: v2);
506 }
507 return true;
508}
509
510/// Returns `true` if the two effects may be affecting aliasing memory. If
511/// an effect is not associated with any value, it is assumed to affect all
512/// memory and therefore aliases with everything. Effects on different resources
513/// cannot alias.
514static bool mayAlias(MemoryEffects::EffectInstance a,
515 MemoryEffects::EffectInstance b) {
516 if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
517 return false;
518 if (Value v2 = b.getValue()) {
519 return mayAlias(a, v2);
520 } else if (Value v = a.getValue()) {
521 return mayAlias(a: b, v2: v);
522 }
523 return true;
524}
525
526/// Returns `true` if any of the "before" effect instances has a conflict with
527/// any "after" instance for the purpose of barrier elimination. The effects are
528/// supposed to be limited to a barrier synchronization scope. A conflict exists
529/// if effects instances affect aliasing memory locations and at least on of
530/// then as a write. As an exception, if the non-write effect is an allocation
531/// effect, there is no conflict since we are only expected to see the
532/// allocation happening in the same thread and it cannot be accessed from
533/// another thread without capture (which we do handle in alias analysis).
534static bool
535haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
536 ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
537 for (const MemoryEffects::EffectInstance &before : beforeEffects) {
538 for (const MemoryEffects::EffectInstance &after : afterEffects) {
539 // If cannot alias, definitely no conflict.
540 if (!mayAlias(a: before, b: after))
541 continue;
542
543 // Read/read is not a conflict.
544 if (isa<MemoryEffects::Read>(Val: before.getEffect()) &&
545 isa<MemoryEffects::Read>(Val: after.getEffect())) {
546 continue;
547 }
548
549 // Allocate/* is not a conflict since the allocation happens within the
550 // thread context.
551 // TODO: This is not the case for */Free unless the allocation happened in
552 // the thread context, which we could also check for.
553 if (isa<MemoryEffects::Allocate>(Val: before.getEffect()) ||
554 isa<MemoryEffects::Allocate>(Val: after.getEffect())) {
555 continue;
556 }
557
558 // In the particular case that the before effect is a free, we only have 2
559 // possibilities:
560 // 1. either the program is well-formed and there must be an interleaved
561 // alloc that must limit the scope of effect lookback and we can
562 // safely ignore the free -> read / free -> write and free -> free
563 // conflicts.
564 // 2. either the program is ill-formed and we are in undefined behavior
565 // territory.
566 if (isa<MemoryEffects::Free>(Val: before.getEffect()))
567 continue;
568
569 // Other kinds of effects create a conflict, e.g. read-after-write.
570 LLVM_DEBUG(
571 DBGS() << "found a conflict between (before): " << before.getValue()
572 << " read:" << isa<MemoryEffects::Read>(before.getEffect())
573 << " write:" << isa<MemoryEffects::Write>(before.getEffect())
574 << " alloc:"
575 << isa<MemoryEffects::Allocate>(before.getEffect()) << " free:"
576 << isa<MemoryEffects::Free>(before.getEffect()) << "\n");
577 LLVM_DEBUG(
578 DBGS() << "and (after): " << after.getValue()
579 << " read:" << isa<MemoryEffects::Read>(after.getEffect())
580 << " write:" << isa<MemoryEffects::Write>(after.getEffect())
581 << " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
582 << " free:" << isa<MemoryEffects::Free>(after.getEffect())
583 << "\n");
584 return true;
585 }
586 }
587
588 return false;
589}
590
591namespace {
592class BarrierElimination final : public OpRewritePattern<BarrierOp> {
593public:
594 using OpRewritePattern<BarrierOp>::OpRewritePattern;
595
596 LogicalResult matchAndRewrite(BarrierOp barrier,
597 PatternRewriter &rewriter) const override {
598 LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " "
599 << barrier.getLoc() << "\n");
600
601 SmallVector<MemoryEffects::EffectInstance> beforeEffects;
602 getEffectsBefore(op: barrier, effects&: beforeEffects, /*stopAtBarrier=*/true);
603
604 SmallVector<MemoryEffects::EffectInstance> afterEffects;
605 getEffectsAfter(op: barrier, effects&: afterEffects, /*stopAtBarrier=*/true);
606
607 if (!haveConflictingEffects(beforeEffects, afterEffects)) {
608 LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing "
609 << barrier << "\n");
610 rewriter.eraseOp(op: barrier);
611 return success();
612 }
613
614 LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " "
615 << barrier.getLoc() << "\n");
616 return failure();
617 }
618};
619
620class GpuEliminateBarriersPass
621 : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
622 void runOnOperation() override {
623 auto funcOp = getOperation();
624 RewritePatternSet patterns(&getContext());
625 mlir::populateGpuEliminateBarriersPatterns(patterns);
626 if (failed(Result: applyPatternsGreedily(op: funcOp, patterns: std::move(patterns)))) {
627 return signalPassFailure();
628 }
629 }
630};
631
632} // namespace
633
634void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) {
635 patterns.insert<BarrierElimination>(arg: patterns.getContext());
636}
637

source code of mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp