1//===- EliminateBarriers.cpp - Eliminate extra barriers --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Barrier elimination pattern and pass. If a barrier does not enforce any
10// conflicting pair of memory effects, including a pair that is enforced by
11// another barrier, it is unnecessary and can be removed. Adapted from
12// "High-Performance GPU-to-CPU Transpilation and Optimization via High-Level
13// Parallel Constructs" by Moses, Ivanov, Domke, Endo, Doerfert, and Zinenko in
14// PPoPP 2023 and implementation in Polygeist.
15//
16//===----------------------------------------------------------------------===//
17
18#include "mlir/Dialect/Func/IR/FuncOps.h"
19#include "mlir/Dialect/GPU/IR/GPUDialect.h"
20#include "mlir/Dialect/GPU/Transforms/Passes.h"
21#include "mlir/Dialect/MemRef/IR/MemRef.h"
22#include "mlir/Dialect/SCF/IR/SCF.h"
23#include "mlir/Dialect/Vector/IR/VectorOps.h"
24#include "mlir/IR/Operation.h"
25#include "mlir/Pass/Pass.h"
26#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
27#include "llvm/ADT/TypeSwitch.h"
28#include "llvm/Support/Debug.h"
29
30namespace mlir {
31#define GEN_PASS_DEF_GPUELIMINATEBARRIERS
32#include "mlir/Dialect/GPU/Transforms/Passes.h.inc"
33} // namespace mlir
34
35using namespace mlir;
36using namespace mlir::gpu;
37
38#define DEBUG_TYPE "gpu-erase-barriers"
39#define DEBUG_TYPE_ALIAS "gpu-erase-barries-alias"
40
41#define DBGS() (llvm::dbgs() << '[' << DEBUG_TYPE << "] ")
42#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
43
44// The functions below provide interface-like verification, but are too specific
45// to barrier elimination to become interfaces.
46
47/// Returns `true` if the op is defines the parallel region that is subject to
48/// barrier synchronization.
49static bool isParallelRegionBoundary(Operation *op) {
50 if (op->hasAttr(name: "__parallel_region_boundary_for_test"))
51 return true;
52
53 return isa<GPUFuncOp, LaunchOp>(op);
54}
55
56/// Returns `true` if the op behaves like a sequential loop, e.g., the control
57/// flow "wraps around" from the end of the body region back to its start.
58static bool isSequentialLoopLike(Operation *op) { return isa<scf::ForOp>(op); }
59
60/// Returns `true` if the regions of the op are guaranteed to be executed at
61/// most once. Thus, if an operation in one of the nested regions of `op` is
62/// executed than so are all the other operations in this region.
63static bool hasSingleExecutionBody(Operation *op) {
64 return isa<FunctionOpInterface, scf::IfOp, memref::AllocaScopeOp>(op);
65}
66
67/// Returns `true` if the operation is known to produce a pointer-like object
68/// distinct from any other object produced by a similar operation. For example,
69/// an allocation produces such an object.
70static bool producesDistinctBase(Operation *op) {
71 return isa_and_nonnull<memref::AllocOp, memref::AllocaOp>(op);
72}
73
74/// Populates `effects` with all memory effects without associating them to a
75/// specific value.
76static void addAllValuelessEffects(
77 SmallVectorImpl<MemoryEffects::EffectInstance> &effects) {
78 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Read>());
79 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Write>());
80 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Allocate>());
81 effects.emplace_back(Args: MemoryEffects::Effect::get<MemoryEffects::Free>());
82}
83
84/// Collect the memory effects of the given op in 'effects'. Returns 'true' if
85/// it could extract the effect information from the op, otherwise returns
86/// 'false' and conservatively populates the list with all possible effects
87/// associated with no particular value or symbol.
88static bool
89collectEffects(Operation *op,
90 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
91 bool ignoreBarriers = true) {
92 // Skip over barriers to avoid infinite recursion (those barriers would ask
93 // this barrier again).
94 if (ignoreBarriers && isa<BarrierOp>(op))
95 return true;
96
97 // Collect effect instances the operation. Note that the implementation of
98 // getEffects erases all effect instances that have the type other than the
99 // template parameter so we collect them first in a local buffer and then
100 // copy.
101 if (auto iface = dyn_cast<MemoryEffectOpInterface>(op)) {
102 SmallVector<MemoryEffects::EffectInstance> localEffects;
103 iface.getEffects(localEffects);
104 llvm::append_range(C&: effects, R&: localEffects);
105 return true;
106 }
107 if (op->hasTrait<OpTrait::HasRecursiveMemoryEffects>()) {
108 for (auto &region : op->getRegions()) {
109 for (auto &block : region) {
110 for (auto &innerOp : block)
111 if (!collectEffects(op: &innerOp, effects, ignoreBarriers))
112 return false;
113 }
114 }
115 return true;
116 }
117
118 // We need to be conservative here in case the op doesn't have the interface
119 // and assume it can have any possible effect.
120 addAllValuelessEffects(effects);
121 return false;
122}
123
124/// Get all effects before the given operation caused by other operations in the
125/// same block. That is, this will not consider operations beyond the block.
126static bool
127getEffectsBeforeInBlock(Operation *op,
128 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
129 bool stopAtBarrier) {
130 if (op == &op->getBlock()->front())
131 return true;
132
133 for (Operation *it = op->getPrevNode(); it != nullptr;
134 it = it->getPrevNode()) {
135 if (isa<BarrierOp>(it)) {
136 if (stopAtBarrier)
137 return true;
138 continue;
139 }
140
141 if (!collectEffects(op: it, effects))
142 return false;
143 }
144 return true;
145}
146
147/// Collects memory effects from operations that may be executed before `op` in
148/// a trivial structured control flow, e.g., without branches. Stops at the
149/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
150/// set. Returns `true` if the memory effects added to `effects` are exact,
151/// `false` if they are a conservative over-approximation. The latter means that
152/// `effects` contain instances not associated with a specific value.
153static bool
154getEffectsBefore(Operation *op,
155 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
156 bool stopAtBarrier) {
157 if (!op->getBlock())
158 return true;
159
160 // If there is a non-structured control flow, bail.
161 Region *region = op->getBlock()->getParent();
162 if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
163 addAllValuelessEffects(effects);
164 return false;
165 }
166
167 // Collect all effects before the op.
168 getEffectsBeforeInBlock(op, effects, stopAtBarrier);
169
170 // Stop if reached the parallel region boundary.
171 if (isParallelRegionBoundary(op: op->getParentOp()))
172 return true;
173
174 Operation *parent = op->getParentOp();
175 // Otherwise, keep collecting above the parent operation.
176 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
177 !getEffectsBefore(op: parent, effects, stopAtBarrier))
178 return false;
179
180 // If the op is loop-like, collect effects from the trailing operations until
181 // we hit a barrier because they can executed before the current operation by
182 // the previous iteration of this loop. For example, in the following loop
183 //
184 // for i = ... {
185 // op1
186 // ...
187 // barrier
188 // op2
189 // }
190 //
191 // the operation `op2` at iteration `i` is known to be executed before the
192 // operation `op1` at iteration `i+1` and the side effects must be ordered
193 // appropriately.
194 if (isSequentialLoopLike(op: parent)) {
195 // Assuming loop terminators have no side effects.
196 return getEffectsBeforeInBlock(op: op->getBlock()->getTerminator(), effects,
197 /*stopAtBarrier=*/true);
198 }
199
200 // If the parent operation is not guaranteed to execute its (single-block)
201 // region once, walk the block.
202 bool conservative = false;
203 if (!hasSingleExecutionBody(op: op->getParentOp()))
204 op->getParentOp()->walk(callback: [&](Operation *in) {
205 if (conservative)
206 return WalkResult::interrupt();
207 if (!collectEffects(op: in, effects)) {
208 conservative = true;
209 return WalkResult::interrupt();
210 }
211 return WalkResult::advance();
212 });
213
214 return !conservative;
215}
216
217/// Get all effects after the given operation caused by other operations in the
218/// same block. That is, this will not consider operations beyond the block.
219static bool
220getEffectsAfterInBlock(Operation *op,
221 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
222 bool stopAtBarrier) {
223 if (op == &op->getBlock()->back())
224 return true;
225
226 for (Operation *it = op->getNextNode(); it != nullptr;
227 it = it->getNextNode()) {
228 if (isa<BarrierOp>(it)) {
229 if (stopAtBarrier)
230 return true;
231 continue;
232 }
233 if (!collectEffects(op: it, effects))
234 return false;
235 }
236 return true;
237}
238
239/// Collects memory effects from operations that may be executed after `op` in
240/// a trivial structured control flow, e.g., without branches. Stops at the
241/// parallel region boundary or at the barrier operation if `stopAtBarrier` is
242/// set. Returns `true` if the memory effects added to `effects` are exact,
243/// `false` if they are a conservative over-approximation. The latter means that
244/// `effects` contain instances not associated with a specific value.
245static bool
246getEffectsAfter(Operation *op,
247 SmallVectorImpl<MemoryEffects::EffectInstance> &effects,
248 bool stopAtBarrier) {
249 if (!op->getBlock())
250 return true;
251
252 // If there is a non-structured control flow, bail.
253 Region *region = op->getBlock()->getParent();
254 if (region && !llvm::hasSingleElement(C&: region->getBlocks())) {
255 addAllValuelessEffects(effects);
256 return false;
257 }
258
259 // Collect all effects after the op.
260 getEffectsAfterInBlock(op, effects, stopAtBarrier);
261
262 Operation *parent = op->getParentOp();
263 // Stop if reached the parallel region boundary.
264 if (isParallelRegionBoundary(op: parent))
265 return true;
266
267 // Otherwise, keep collecting below the parent operation.
268 // Don't look into, for example, neighboring functions
269 if (!parent->hasTrait<OpTrait::IsIsolatedFromAbove>() &&
270 !getEffectsAfter(op: parent, effects, stopAtBarrier))
271 return false;
272
273 // If the op is loop-like, collect effects from the leading operations until
274 // we hit a barrier because they can executed after the current operation by
275 // the next iteration of this loop. For example, in the following loop
276 //
277 // for i = ... {
278 // op1
279 // ...
280 // barrier
281 // op2
282 // }
283 //
284 // the operation `op1` at iteration `i` is known to be executed after the
285 // operation `op2` at iteration `i-1` and the side effects must be ordered
286 // appropriately.
287 if (isSequentialLoopLike(op: parent)) {
288 if (isa<BarrierOp>(op->getBlock()->front()))
289 return true;
290
291 bool exact = collectEffects(op: &op->getBlock()->front(), effects);
292 return getEffectsAfterInBlock(op: &op->getBlock()->front(), effects,
293 /*stopAtBarrier=*/true) &&
294 exact;
295 }
296
297 // If the parent operation is not guaranteed to execute its (single-block)
298 // region once, walk the block.
299 bool conservative = false;
300 if (!hasSingleExecutionBody(op: op->getParentOp()))
301 op->getParentOp()->walk(callback: [&](Operation *in) {
302 if (conservative)
303 return WalkResult::interrupt();
304 if (!collectEffects(op: in, effects)) {
305 conservative = true;
306 return WalkResult::interrupt();
307 }
308 return WalkResult::advance();
309 });
310
311 return !conservative;
312}
313
314/// Looks through known "view-like" ops to find the base memref.
315static Value getBase(Value v) {
316 while (true) {
317 Operation *definingOp = v.getDefiningOp();
318 if (!definingOp)
319 break;
320
321 bool shouldContinue =
322 TypeSwitch<Operation *, bool>(v.getDefiningOp())
323 .Case<memref::CastOp, memref::SubViewOp, memref::ViewOp>(
324 [&](auto op) {
325 v = op.getSource();
326 return true;
327 })
328 .Case<memref::TransposeOp>([&](auto op) {
329 v = op.getIn();
330 return true;
331 })
332 .Case<memref::CollapseShapeOp, memref::ExpandShapeOp>([&](auto op) {
333 v = op.getSrc();
334 return true;
335 })
336 .Default([](Operation *) { return false; });
337 if (!shouldContinue)
338 break;
339 }
340 return v;
341}
342
343/// Returns `true` if the value is defined as a function argument.
344static bool isFunctionArgument(Value v) {
345 auto arg = dyn_cast<BlockArgument>(Val&: v);
346 return arg && isa<FunctionOpInterface>(Val: arg.getOwner()->getParentOp());
347}
348
349/// Returns the operand that the operation "propagates" through it for capture
350/// purposes. That is, if the value produced by this operation is captured, then
351/// so is the returned value.
352static Value propagatesCapture(Operation *op) {
353 return llvm::TypeSwitch<Operation *, Value>(op)
354 .Case(
355 [](ViewLikeOpInterface viewLike) { return viewLike.getViewSource(); })
356 .Case([](CastOpInterface castLike) { return castLike->getOperand(0); })
357 .Case([](memref::TransposeOp transpose) { return transpose.getIn(); })
358 .Case<memref::ExpandShapeOp, memref::CollapseShapeOp>(
359 [](auto op) { return op.getSrc(); })
360 .Default([](Operation *) { return Value(); });
361}
362
363/// Returns `true` if the given operation is known to capture the given value,
364/// `false` if it is known not to capture the given value, `nullopt` if neither
365/// is known.
366static std::optional<bool> getKnownCapturingStatus(Operation *op, Value v) {
367 return llvm::TypeSwitch<Operation *, std::optional<bool>>(op)
368 // Store-like operations don't capture the destination, but do capture
369 // the value.
370 .Case<memref::StoreOp, vector::TransferWriteOp>(
371 [&](auto op) { return op.getValue() == v; })
372 .Case<vector::StoreOp, vector::MaskedStoreOp>(
373 [&](auto op) { return op.getValueToStore() == v; })
374 // These operations are known not to capture.
375 .Case([](memref::DeallocOp) { return false; })
376 // By default, we don't know anything.
377 .Default([](Operation *) { return std::nullopt; });
378}
379
380/// Returns `true` if the value may be captured by any of its users, i.e., if
381/// the user may be storing this value into memory. This makes aliasing analysis
382/// more conservative as it cannot assume the pointer-like value is only passed
383/// around through SSA use-def.
384static bool maybeCaptured(Value v) {
385 SmallVector<Value> todo = {v};
386 while (!todo.empty()) {
387 Value v = todo.pop_back_val();
388 for (Operation *user : v.getUsers()) {
389 // A user that is known to only read cannot capture.
390 auto iface = dyn_cast<MemoryEffectOpInterface>(user);
391 if (iface) {
392 SmallVector<MemoryEffects::EffectInstance> effects;
393 iface.getEffects(effects);
394 if (llvm::all_of(Range&: effects,
395 P: [](const MemoryEffects::EffectInstance &effect) {
396 return isa<MemoryEffects::Read>(Val: effect.getEffect());
397 })) {
398 continue;
399 }
400 }
401
402 // When an operation is known to create an alias, consider if the
403 // source is captured as well.
404 if (Value v = propagatesCapture(op: user)) {
405 todo.push_back(Elt: v);
406 continue;
407 }
408
409 std::optional<bool> knownCaptureStatus = getKnownCapturingStatus(op: user, v);
410 if (!knownCaptureStatus || *knownCaptureStatus)
411 return true;
412 }
413 }
414
415 return false;
416}
417
418/// Returns true if two values may be referencing aliasing memory. This is a
419/// rather naive and conservative analysis. Values defined by different
420/// allocation-like operations as well as values derived from those by casts and
421/// views cannot alias each other. Similarly, values defined by allocations
422/// inside a function cannot alias function arguments. Global values cannot
423/// alias each other or local allocations. Values that are captured, i.e.
424/// themselves potentially stored in memory, are considered as aliasing with
425/// everything. This seems sufficient to achieve barrier removal in structured
426/// control flow, more complex cases would require a proper dataflow analysis.
427static bool mayAlias(Value first, Value second) {
428 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
429 DBGS_ALIAS() << "checking aliasing between ";
430 DBGS_ALIAS() << first << "\n";
431 DBGS_ALIAS() << " and ";
432 DBGS_ALIAS() << second << "\n";
433 });
434
435 first = getBase(v: first);
436 second = getBase(v: second);
437
438 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, {
439 DBGS_ALIAS() << "base ";
440 DBGS_ALIAS() << first << "\n";
441 DBGS_ALIAS() << " and ";
442 DBGS_ALIAS() << second << "\n";
443 });
444
445 // Values derived from the same base memref do alias (unless we do a more
446 // advanced analysis to prove non-overlapping accesses).
447 if (first == second) {
448 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> do alias!\n");
449 return true;
450 }
451
452 // Different globals cannot alias.
453 if (auto globFirst = first.getDefiningOp<memref::GetGlobalOp>()) {
454 if (auto globSecond = second.getDefiningOp<memref::GetGlobalOp>()) {
455 return globFirst.getNameAttr() == globSecond.getNameAttr();
456 }
457 }
458
459 // Two function arguments marked as noalias do not alias.
460 auto isNoaliasFuncArgument = [](Value value) {
461 auto bbArg = dyn_cast<BlockArgument>(Val&: value);
462 if (!bbArg)
463 return false;
464 auto iface = dyn_cast<FunctionOpInterface>(bbArg.getOwner()->getParentOp());
465 if (!iface)
466 return false;
467 // TODO: we need a way to not depend on the LLVM dialect here.
468 return iface.getArgAttr(bbArg.getArgNumber(), "llvm.noalias") != nullptr;
469 };
470 if (isNoaliasFuncArgument(first) && isNoaliasFuncArgument(second))
471 return false;
472
473 bool isDistinct[] = {producesDistinctBase(op: first.getDefiningOp()),
474 producesDistinctBase(op: second.getDefiningOp())};
475 bool isGlobal[] = {first.getDefiningOp<memref::GetGlobalOp>() != nullptr,
476 second.getDefiningOp<memref::GetGlobalOp>() != nullptr};
477
478 // Non-equivalent distinct bases and globals cannot alias. At this point, we
479 // have already filtered out based on values being equal and global name being
480 // equal.
481 if ((isDistinct[0] || isGlobal[0]) && (isDistinct[1] || isGlobal[1]))
482 return false;
483
484 bool isArg[] = {isFunctionArgument(v: first), isFunctionArgument(v: second)};
485
486 // Distinct bases (allocations) cannot have been passed as an argument.
487 if ((isDistinct[0] && isArg[1]) || (isDistinct[1] && isArg[0]))
488 return false;
489
490 // Non-captured base distinct values cannot conflict with another base value.
491 if (isDistinct[0] && !maybeCaptured(v: first))
492 return false;
493 if (isDistinct[1] && !maybeCaptured(v: second))
494 return false;
495
496 // Otherwise, conservatively assume aliasing.
497 DEBUG_WITH_TYPE(DEBUG_TYPE_ALIAS, DBGS_ALIAS() << "-> may alias!\n");
498 return true;
499}
500
501/// Returns `true` if the effect may be affecting memory aliasing the value. If
502/// the effect is not associated with any value, it is assumed to affect all
503/// memory and therefore aliases with everything.
504static bool mayAlias(MemoryEffects::EffectInstance a, Value v2) {
505 if (Value v = a.getValue()) {
506 return mayAlias(first: v, second: v2);
507 }
508 return true;
509}
510
511/// Returns `true` if the two effects may be affecting aliasing memory. If
512/// an effect is not associated with any value, it is assumed to affect all
513/// memory and therefore aliases with everything. Effects on different resources
514/// cannot alias.
515static bool mayAlias(MemoryEffects::EffectInstance a,
516 MemoryEffects::EffectInstance b) {
517 if (a.getResource()->getResourceID() != b.getResource()->getResourceID())
518 return false;
519 if (Value v2 = b.getValue()) {
520 return mayAlias(a, v2);
521 } else if (Value v = a.getValue()) {
522 return mayAlias(a: b, v2: v);
523 }
524 return true;
525}
526
527/// Returns `true` if any of the "before" effect instances has a conflict with
528/// any "after" instance for the purpose of barrier elimination. The effects are
529/// supposed to be limited to a barrier synchronization scope. A conflict exists
530/// if effects instances affect aliasing memory locations and at least on of
531/// then as a write. As an exception, if the non-write effect is an allocation
532/// effect, there is no conflict since we are only expected to see the
533/// allocation happening in the same thread and it cannot be accessed from
534/// another thread without capture (which we do handle in alias analysis).
535static bool
536haveConflictingEffects(ArrayRef<MemoryEffects::EffectInstance> beforeEffects,
537 ArrayRef<MemoryEffects::EffectInstance> afterEffects) {
538 for (const MemoryEffects::EffectInstance &before : beforeEffects) {
539 for (const MemoryEffects::EffectInstance &after : afterEffects) {
540 // If cannot alias, definitely no conflict.
541 if (!mayAlias(a: before, b: after))
542 continue;
543
544 // Read/read is not a conflict.
545 if (isa<MemoryEffects::Read>(Val: before.getEffect()) &&
546 isa<MemoryEffects::Read>(Val: after.getEffect())) {
547 continue;
548 }
549
550 // Allocate/* is not a conflict since the allocation happens within the
551 // thread context.
552 // TODO: This is not the case for */Free unless the allocation happened in
553 // the thread context, which we could also check for.
554 if (isa<MemoryEffects::Allocate>(Val: before.getEffect()) ||
555 isa<MemoryEffects::Allocate>(Val: after.getEffect())) {
556 continue;
557 }
558
559 // In the particular case that the before effect is a free, we only have 2
560 // possibilities:
561 // 1. either the program is well-formed and there must be an interleaved
562 // alloc that must limit the scope of effect lookback and we can
563 // safely ignore the free -> read / free -> write and free -> free
564 // conflicts.
565 // 2. either the program is ill-formed and we are in undefined behavior
566 // territory.
567 if (isa<MemoryEffects::Free>(Val: before.getEffect()))
568 continue;
569
570 // Other kinds of effects create a conflict, e.g. read-after-write.
571 LLVM_DEBUG(
572 DBGS() << "found a conflict between (before): " << before.getValue()
573 << " read:" << isa<MemoryEffects::Read>(before.getEffect())
574 << " write:" << isa<MemoryEffects::Write>(before.getEffect())
575 << " alloc:"
576 << isa<MemoryEffects::Allocate>(before.getEffect()) << " free:"
577 << isa<MemoryEffects::Free>(before.getEffect()) << "\n");
578 LLVM_DEBUG(
579 DBGS() << "and (after): " << after.getValue()
580 << " read:" << isa<MemoryEffects::Read>(after.getEffect())
581 << " write:" << isa<MemoryEffects::Write>(after.getEffect())
582 << " alloc:" << isa<MemoryEffects::Allocate>(after.getEffect())
583 << " free:" << isa<MemoryEffects::Free>(after.getEffect())
584 << "\n");
585 return true;
586 }
587 }
588
589 return false;
590}
591
592namespace {
593class BarrierElimination final : public OpRewritePattern<BarrierOp> {
594public:
595 using OpRewritePattern<BarrierOp>::OpRewritePattern;
596
597 LogicalResult matchAndRewrite(BarrierOp barrier,
598 PatternRewriter &rewriter) const override {
599 LLVM_DEBUG(DBGS() << "checking the necessity of: " << barrier << " "
600 << barrier.getLoc() << "\n");
601
602 SmallVector<MemoryEffects::EffectInstance> beforeEffects;
603 getEffectsBefore(barrier, beforeEffects, /*stopAtBarrier=*/true);
604
605 SmallVector<MemoryEffects::EffectInstance> afterEffects;
606 getEffectsAfter(barrier, afterEffects, /*stopAtBarrier=*/true);
607
608 if (!haveConflictingEffects(beforeEffects, afterEffects)) {
609 LLVM_DEBUG(DBGS() << "the surrounding barriers are sufficient, removing "
610 << barrier << "\n");
611 rewriter.eraseOp(op: barrier);
612 return success();
613 }
614
615 LLVM_DEBUG(DBGS() << "barrier is necessary: " << barrier << " "
616 << barrier.getLoc() << "\n");
617 return failure();
618 }
619};
620
621class GpuEliminateBarriersPass
622 : public impl::GpuEliminateBarriersBase<GpuEliminateBarriersPass> {
623 void runOnOperation() override {
624 auto funcOp = getOperation();
625 RewritePatternSet patterns(&getContext());
626 mlir::populateGpuEliminateBarriersPatterns(patterns);
627 if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
628 return signalPassFailure();
629 }
630 }
631};
632
633} // namespace
634
635void mlir::populateGpuEliminateBarriersPatterns(RewritePatternSet &patterns) {
636 patterns.insert<BarrierElimination>(arg: patterns.getContext());
637}
638

source code of mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp