| 1 | //===- GenericLoopConversion.cpp ------------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "flang/Support/OpenMP-utils.h" |
| 10 | |
| 11 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
| 12 | #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
| 13 | #include "mlir/IR/IRMapping.h" |
| 14 | #include "mlir/Pass/Pass.h" |
| 15 | #include "mlir/Transforms/DialectConversion.h" |
| 16 | |
| 17 | #include <memory> |
| 18 | #include <optional> |
| 19 | #include <type_traits> |
| 20 | |
| 21 | namespace flangomp { |
| 22 | #define GEN_PASS_DEF_GENERICLOOPCONVERSIONPASS |
| 23 | #include "flang/Optimizer/OpenMP/Passes.h.inc" |
| 24 | } // namespace flangomp |
| 25 | |
| 26 | namespace { |
| 27 | |
| 28 | /// A conversion pattern to handle various combined forms of `omp.loop`. For how |
| 29 | /// combined/composite directive are handled see: |
| 30 | /// https://discourse.llvm.org/t/rfc-representing-combined-composite-constructs-in-the-openmp-dialect/76986. |
| 31 | class GenericLoopConversionPattern |
| 32 | : public mlir::OpConversionPattern<mlir::omp::LoopOp> { |
| 33 | public: |
| 34 | enum class GenericLoopCombinedInfo { Standalone, TeamsLoop, ParallelLoop }; |
| 35 | |
| 36 | using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern; |
| 37 | |
| 38 | explicit GenericLoopConversionPattern(mlir::MLIRContext *ctx) |
| 39 | : mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} { |
| 40 | // Enable rewrite recursion to make sure nested `loop` directives are |
| 41 | // handled. |
| 42 | this->setHasBoundedRewriteRecursion(true); |
| 43 | } |
| 44 | |
| 45 | mlir::LogicalResult |
| 46 | matchAndRewrite(mlir::omp::LoopOp loopOp, OpAdaptor adaptor, |
| 47 | mlir::ConversionPatternRewriter &rewriter) const override { |
| 48 | assert(mlir::succeeded(checkLoopConversionSupportStatus(loopOp))); |
| 49 | |
| 50 | GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo(loopOp); |
| 51 | |
| 52 | switch (combinedInfo) { |
| 53 | case GenericLoopCombinedInfo::Standalone: |
| 54 | rewriteStandaloneLoop(loopOp, rewriter); |
| 55 | break; |
| 56 | case GenericLoopCombinedInfo::ParallelLoop: |
| 57 | rewriteToWsloop(loopOp, rewriter); |
| 58 | break; |
| 59 | case GenericLoopCombinedInfo::TeamsLoop: |
| 60 | if (teamsLoopCanBeParallelFor(loopOp)) { |
| 61 | rewriteToDistributeParallelDo(loopOp, rewriter); |
| 62 | } else { |
| 63 | auto teamsOp = llvm::cast<mlir::omp::TeamsOp>(loopOp->getParentOp()); |
| 64 | auto teamsBlockArgIface = |
| 65 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*teamsOp); |
| 66 | auto loopBlockArgIface = |
| 67 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); |
| 68 | |
| 69 | for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs(); |
| 70 | ++i) { |
| 71 | mlir::BlockArgument loopRedBlockArg = |
| 72 | loopBlockArgIface.getReductionBlockArgs()[i]; |
| 73 | mlir::BlockArgument teamsRedBlockArg = |
| 74 | teamsBlockArgIface.getReductionBlockArgs()[i]; |
| 75 | rewriter.replaceAllUsesWith(loopRedBlockArg, teamsRedBlockArg); |
| 76 | } |
| 77 | |
| 78 | for (unsigned i = 0; i < loopBlockArgIface.numReductionBlockArgs(); |
| 79 | ++i) { |
| 80 | loopOp.getRegion().eraseArgument( |
| 81 | loopBlockArgIface.getReductionBlockArgsStart()); |
| 82 | } |
| 83 | |
| 84 | loopOp.removeReductionModAttr(); |
| 85 | loopOp.getReductionVarsMutable().clear(); |
| 86 | loopOp.removeReductionByrefAttr(); |
| 87 | loopOp.removeReductionSymsAttr(); |
| 88 | |
| 89 | rewriteToDistribute(loopOp, rewriter); |
| 90 | } |
| 91 | |
| 92 | break; |
| 93 | } |
| 94 | |
| 95 | rewriter.eraseOp(loopOp); |
| 96 | return mlir::success(); |
| 97 | } |
| 98 | |
| 99 | static mlir::LogicalResult |
| 100 | checkLoopConversionSupportStatus(mlir::omp::LoopOp loopOp) { |
| 101 | auto todo = [&loopOp](mlir::StringRef clauseName) { |
| 102 | return loopOp.emitError() |
| 103 | << "not yet implemented: Unhandled clause " << clauseName << " in " |
| 104 | << loopOp->getName() << " operation" ; |
| 105 | }; |
| 106 | |
| 107 | if (loopOp.getOrder()) |
| 108 | return todo("order" ); |
| 109 | |
| 110 | return mlir::success(); |
| 111 | } |
| 112 | |
| 113 | private: |
| 114 | static GenericLoopCombinedInfo |
| 115 | findGenericLoopCombineInfo(mlir::omp::LoopOp loopOp) { |
| 116 | mlir::Operation *parentOp = loopOp->getParentOp(); |
| 117 | GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone; |
| 118 | |
| 119 | if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp)) |
| 120 | result = GenericLoopCombinedInfo::TeamsLoop; |
| 121 | |
| 122 | if (auto parallelOp = |
| 123 | mlir::dyn_cast_if_present<mlir::omp::ParallelOp>(parentOp)) |
| 124 | result = GenericLoopCombinedInfo::ParallelLoop; |
| 125 | |
| 126 | return result; |
| 127 | } |
| 128 | |
| 129 | /// Checks whether a `teams loop` construct can be rewriten to `teams |
| 130 | /// distribute parallel do` or it has to be converted to `teams distribute`. |
| 131 | /// |
| 132 | /// This checks similar constrains to what is checked by `TeamsLoopChecker` in |
| 133 | /// SemaOpenMP.cpp in clang. |
| 134 | static bool teamsLoopCanBeParallelFor(mlir::omp::LoopOp loopOp) { |
| 135 | bool canBeParallelFor = |
| 136 | !loopOp |
| 137 | .walk<mlir::WalkOrder::PreOrder>([&](mlir::Operation *nestedOp) { |
| 138 | if (nestedOp == loopOp) |
| 139 | return mlir::WalkResult::advance(); |
| 140 | |
| 141 | if (auto nestedLoopOp = |
| 142 | mlir::dyn_cast<mlir::omp::LoopOp>(nestedOp)) { |
| 143 | GenericLoopCombinedInfo combinedInfo = |
| 144 | findGenericLoopCombineInfo(nestedLoopOp); |
| 145 | |
| 146 | // Worksharing loops cannot be nested inside each other. |
| 147 | // Therefore, if the current `loop` directive nests another |
| 148 | // `loop` whose `bind` modifier is `parallel`, this `loop` |
| 149 | // directive cannot be mapped to `distribute parallel for` |
| 150 | // but rather only to `distribute`. |
| 151 | if (combinedInfo == GenericLoopCombinedInfo::Standalone && |
| 152 | nestedLoopOp.getBindKind() && |
| 153 | *nestedLoopOp.getBindKind() == |
| 154 | mlir::omp::ClauseBindKind::Parallel) |
| 155 | return mlir::WalkResult::interrupt(); |
| 156 | |
| 157 | if (combinedInfo == GenericLoopCombinedInfo::ParallelLoop) |
| 158 | return mlir::WalkResult::interrupt(); |
| 159 | |
| 160 | } else if (auto callOp = |
| 161 | mlir::dyn_cast<mlir::CallOpInterface>(nestedOp)) { |
| 162 | // Calls to non-OpenMP API runtime functions inhibits |
| 163 | // transformation to `teams distribute parallel do` since the |
| 164 | // called functions might have nested parallelism themselves. |
| 165 | bool isOpenMPAPI = false; |
| 166 | mlir::CallInterfaceCallable callable = |
| 167 | callOp.getCallableForCallee(); |
| 168 | |
| 169 | if (auto callableSymRef = |
| 170 | mlir::dyn_cast<mlir::SymbolRefAttr>(callable)) |
| 171 | isOpenMPAPI = |
| 172 | callableSymRef.getRootReference().strref().starts_with( |
| 173 | "omp_" ); |
| 174 | |
| 175 | if (!isOpenMPAPI) |
| 176 | return mlir::WalkResult::interrupt(); |
| 177 | } |
| 178 | |
| 179 | return mlir::WalkResult::advance(); |
| 180 | }) |
| 181 | .wasInterrupted(); |
| 182 | |
| 183 | return canBeParallelFor; |
| 184 | } |
| 185 | |
| 186 | void rewriteStandaloneLoop(mlir::omp::LoopOp loopOp, |
| 187 | mlir::ConversionPatternRewriter &rewriter) const { |
| 188 | using namespace mlir::omp; |
| 189 | std::optional<ClauseBindKind> bindKind = loopOp.getBindKind(); |
| 190 | |
| 191 | if (!bindKind.has_value()) |
| 192 | return rewriteToSimdLoop(loopOp, rewriter); |
| 193 | |
| 194 | switch (*loopOp.getBindKind()) { |
| 195 | case ClauseBindKind::Parallel: |
| 196 | return rewriteToWsloop(loopOp, rewriter); |
| 197 | case ClauseBindKind::Teams: |
| 198 | return rewriteToDistribute(loopOp, rewriter); |
| 199 | case ClauseBindKind::Thread: |
| 200 | return rewriteToSimdLoop(loopOp, rewriter); |
| 201 | } |
| 202 | } |
| 203 | |
| 204 | /// Rewrites standalone `loop` (without `bind` clause or with |
| 205 | /// `bind(parallel)`) directives to equivalent `simd` constructs. |
| 206 | /// |
| 207 | /// The reasoning behind this decision is that according to the spec (version |
| 208 | /// 5.2, section 11.7.1): |
| 209 | /// |
| 210 | /// "If the bind clause is not specified on a construct for which it may be |
| 211 | /// specified and the construct is closely nested inside a teams or parallel |
| 212 | /// construct, the effect is as if binding is teams or parallel. If none of |
| 213 | /// those conditions hold, the binding region is not defined." |
| 214 | /// |
| 215 | /// which means that standalone `loop` directives have undefined binding |
| 216 | /// region. Moreover, the spec says (in the next paragraph): |
| 217 | /// |
| 218 | /// "The specified binding region determines the binding thread set. |
| 219 | /// Specifically, if the binding region is a teams region, then the binding |
| 220 | /// thread set is the set of initial threads that are executing that region |
| 221 | /// while if the binding region is a parallel region, then the binding thread |
| 222 | /// set is the team of threads that are executing that region. If the binding |
| 223 | /// region is not defined, then the binding thread set is the encountering |
| 224 | /// thread." |
| 225 | /// |
| 226 | /// which means that the binding thread set for a standalone `loop` directive |
| 227 | /// is only the encountering thread. |
| 228 | /// |
| 229 | /// Since the encountering thread is the binding thread (set) for a |
| 230 | /// standalone `loop` directive, the best we can do in such case is to "simd" |
| 231 | /// the directive. |
| 232 | void rewriteToSimdLoop(mlir::omp::LoopOp loopOp, |
| 233 | mlir::ConversionPatternRewriter &rewriter) const { |
| 234 | loopOp.emitWarning( |
| 235 | "Detected standalone OpenMP `loop` directive with thread binding, " |
| 236 | "the associated loop will be rewritten to `simd`." ); |
| 237 | rewriteToSingleWrapperOp<mlir::omp::SimdOp, mlir::omp::SimdOperands>( |
| 238 | loopOp, rewriter); |
| 239 | } |
| 240 | |
| 241 | void rewriteToDistribute(mlir::omp::LoopOp loopOp, |
| 242 | mlir::ConversionPatternRewriter &rewriter) const { |
| 243 | assert(loopOp.getReductionVars().empty()); |
| 244 | rewriteToSingleWrapperOp<mlir::omp::DistributeOp, |
| 245 | mlir::omp::DistributeOperands>(loopOp, rewriter); |
| 246 | } |
| 247 | |
| 248 | void rewriteToWsloop(mlir::omp::LoopOp loopOp, |
| 249 | mlir::ConversionPatternRewriter &rewriter) const { |
| 250 | rewriteToSingleWrapperOp<mlir::omp::WsloopOp, mlir::omp::WsloopOperands>( |
| 251 | loopOp, rewriter); |
| 252 | } |
| 253 | |
| 254 | // TODO Suggestion by Sergio: tag auto-generated operations for constructs |
| 255 | // that weren't part of the original program, that would be useful |
| 256 | // information for debugging purposes later on. This new attribute could be |
| 257 | // used for `omp.loop`, but also for `do concurrent` transformations, |
| 258 | // `workshare`, `workdistribute`, etc. The tag could be used for all kinds of |
| 259 | // auto-generated operations using a dialect attribute (named something like |
| 260 | // `omp.origin` or `omp.derived`) and perhaps hold the name of the operation |
| 261 | // it was derived from, the reason it was transformed or something like that |
| 262 | // we could use when emitting any messages related to it later on. |
| 263 | template <typename OpTy, typename OpOperandsTy> |
| 264 | void |
| 265 | rewriteToSingleWrapperOp(mlir::omp::LoopOp loopOp, |
| 266 | mlir::ConversionPatternRewriter &rewriter) const { |
| 267 | OpOperandsTy clauseOps; |
| 268 | clauseOps.privateVars = loopOp.getPrivateVars(); |
| 269 | |
| 270 | auto privateSyms = loopOp.getPrivateSyms(); |
| 271 | if (privateSyms) |
| 272 | clauseOps.privateSyms.assign(privateSyms->begin(), privateSyms->end()); |
| 273 | |
| 274 | Fortran::common::openmp::EntryBlockArgs args; |
| 275 | args.priv.vars = clauseOps.privateVars; |
| 276 | |
| 277 | if constexpr (!std::is_same_v<OpOperandsTy, |
| 278 | mlir::omp::DistributeOperands>) { |
| 279 | populateReductionClauseOps(loopOp, clauseOps); |
| 280 | args.reduction.vars = clauseOps.reductionVars; |
| 281 | } |
| 282 | |
| 283 | auto wrapperOp = rewriter.create<OpTy>(loopOp.getLoc(), clauseOps); |
| 284 | mlir::Block *opBlock = genEntryBlock(rewriter, args, wrapperOp.getRegion()); |
| 285 | |
| 286 | mlir::IRMapping mapper; |
| 287 | mlir::Block &loopBlock = *loopOp.getRegion().begin(); |
| 288 | |
| 289 | for (auto [loopOpArg, opArg] : |
| 290 | llvm::zip_equal(loopBlock.getArguments(), opBlock->getArguments())) |
| 291 | mapper.map(loopOpArg, opArg); |
| 292 | |
| 293 | rewriter.clone(*loopOp.begin(), mapper); |
| 294 | } |
| 295 | |
| 296 | void rewriteToDistributeParallelDo( |
| 297 | mlir::omp::LoopOp loopOp, |
| 298 | mlir::ConversionPatternRewriter &rewriter) const { |
| 299 | mlir::omp::ParallelOperands parallelClauseOps; |
| 300 | parallelClauseOps.privateVars = loopOp.getPrivateVars(); |
| 301 | |
| 302 | auto privateSyms = loopOp.getPrivateSyms(); |
| 303 | if (privateSyms) |
| 304 | parallelClauseOps.privateSyms.assign(privateSyms->begin(), |
| 305 | privateSyms->end()); |
| 306 | |
| 307 | Fortran::common::openmp::EntryBlockArgs parallelArgs; |
| 308 | parallelArgs.priv.vars = parallelClauseOps.privateVars; |
| 309 | |
| 310 | auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loopOp.getLoc(), |
| 311 | parallelClauseOps); |
| 312 | genEntryBlock(rewriter, parallelArgs, parallelOp.getRegion()); |
| 313 | parallelOp.setComposite(true); |
| 314 | rewriter.setInsertionPoint( |
| 315 | rewriter.create<mlir::omp::TerminatorOp>(loopOp.getLoc())); |
| 316 | |
| 317 | mlir::omp::DistributeOperands distributeClauseOps; |
| 318 | auto distributeOp = rewriter.create<mlir::omp::DistributeOp>( |
| 319 | loopOp.getLoc(), distributeClauseOps); |
| 320 | distributeOp.setComposite(true); |
| 321 | rewriter.createBlock(&distributeOp.getRegion()); |
| 322 | |
| 323 | mlir::omp::WsloopOperands wsloopClauseOps; |
| 324 | populateReductionClauseOps(loopOp, wsloopClauseOps); |
| 325 | Fortran::common::openmp::EntryBlockArgs wsloopArgs; |
| 326 | wsloopArgs.reduction.vars = wsloopClauseOps.reductionVars; |
| 327 | |
| 328 | auto wsloopOp = |
| 329 | rewriter.create<mlir::omp::WsloopOp>(loopOp.getLoc(), wsloopClauseOps); |
| 330 | wsloopOp.setComposite(true); |
| 331 | genEntryBlock(rewriter, wsloopArgs, wsloopOp.getRegion()); |
| 332 | |
| 333 | mlir::IRMapping mapper; |
| 334 | |
| 335 | auto loopBlockInterface = |
| 336 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*loopOp); |
| 337 | auto parallelBlockInterface = |
| 338 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*parallelOp); |
| 339 | auto wsloopBlockInterface = |
| 340 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*wsloopOp); |
| 341 | |
| 342 | for (auto [loopOpArg, parallelOpArg] : |
| 343 | llvm::zip_equal(loopBlockInterface.getPrivateBlockArgs(), |
| 344 | parallelBlockInterface.getPrivateBlockArgs())) |
| 345 | mapper.map(loopOpArg, parallelOpArg); |
| 346 | |
| 347 | for (auto [loopOpArg, wsloopOpArg] : |
| 348 | llvm::zip_equal(loopBlockInterface.getReductionBlockArgs(), |
| 349 | wsloopBlockInterface.getReductionBlockArgs())) |
| 350 | mapper.map(loopOpArg, wsloopOpArg); |
| 351 | |
| 352 | rewriter.clone(*loopOp.begin(), mapper); |
| 353 | } |
| 354 | |
| 355 | void |
| 356 | populateReductionClauseOps(mlir::omp::LoopOp loopOp, |
| 357 | mlir::omp::ReductionClauseOps &clauseOps) const { |
| 358 | clauseOps.reductionMod = loopOp.getReductionModAttr(); |
| 359 | clauseOps.reductionVars = loopOp.getReductionVars(); |
| 360 | |
| 361 | std::optional<mlir::ArrayAttr> reductionSyms = loopOp.getReductionSyms(); |
| 362 | if (reductionSyms) |
| 363 | clauseOps.reductionSyms.assign(reductionSyms->begin(), |
| 364 | reductionSyms->end()); |
| 365 | |
| 366 | std::optional<llvm::ArrayRef<bool>> reductionByref = |
| 367 | loopOp.getReductionByref(); |
| 368 | if (reductionByref) |
| 369 | clauseOps.reductionByref.assign(reductionByref->begin(), |
| 370 | reductionByref->end()); |
| 371 | } |
| 372 | }; |
| 373 | |
| 374 | /// According to the spec (v5.2, p340, 36): |
| 375 | /// |
| 376 | /// ``` |
| 377 | /// The effect of the reduction clause is as if it is applied to all leaf |
| 378 | /// constructs that permit the clause, except for the following constructs: |
| 379 | /// * .... |
| 380 | /// * The teams construct, when combined with the loop construct. |
| 381 | /// ``` |
| 382 | /// |
| 383 | /// Therefore, for a combined directive similar to: `!$omp teams loop |
| 384 | /// reduction(...)`, the earlier stages of the compiler assign the `reduction` |
| 385 | /// clauses only to the `loop` leaf and not to the `teams` leaf. |
| 386 | /// |
| 387 | /// On the other hand, if we have a combined construct similar to: `!$omp teams |
| 388 | /// distribute parallel do`, the `reduction` clauses are assigned both to the |
| 389 | /// `teams` and the `do` leaves. We need to match this behavior when we convert |
| 390 | /// `teams` op with a nested `loop` op since the target set of constructs/ops |
| 391 | /// will be incorrect without moving the reductions up to the `teams` op as |
| 392 | /// well. |
| 393 | /// |
| 394 | /// This pattern does exactly this. Given the following input: |
| 395 | /// ``` |
| 396 | /// omp.teams { |
| 397 | /// omp.loop reduction(@red_sym %red_op -> %red_arg : !fir.ref<i32>) { |
| 398 | /// omp.loop_nest ... { |
| 399 | /// ... |
| 400 | /// } |
| 401 | /// } |
| 402 | /// } |
| 403 | /// ``` |
| 404 | /// this pattern updates the `omp.teams` op in-place to: |
| 405 | /// ``` |
| 406 | /// omp.teams reduction(@red_sym %red_op -> %teams_red_arg : !fir.ref<i32>) { |
| 407 | /// omp.loop reduction(@red_sym %teams_red_arg -> %red_arg : !fir.ref<i32>) { |
| 408 | /// omp.loop_nest ... { |
| 409 | /// ... |
| 410 | /// } |
| 411 | /// } |
| 412 | /// } |
| 413 | /// ``` |
| 414 | /// |
| 415 | /// Note the following: |
| 416 | /// * The nested `omp.loop` is not rewritten by this pattern, this happens |
| 417 | /// through `GenericLoopConversionPattern`. |
| 418 | /// * The reduction info are cloned from the nested `omp.loop` op to the parent |
| 419 | /// `omp.teams` op. |
| 420 | /// * The reduction operand of the `omp.loop` op is updated to be the **new** |
| 421 | /// reduction block argument of the `omp.teams` op. |
| 422 | class ReductionsHoistingPattern |
| 423 | : public mlir::OpConversionPattern<mlir::omp::TeamsOp> { |
| 424 | public: |
| 425 | using mlir::OpConversionPattern<mlir::omp::TeamsOp>::OpConversionPattern; |
| 426 | |
| 427 | static mlir::omp::LoopOp |
| 428 | tryToFindNestedLoopWithReduction(mlir::omp::TeamsOp teamsOp) { |
| 429 | if (teamsOp.getRegion().getBlocks().size() != 1) |
| 430 | return nullptr; |
| 431 | |
| 432 | mlir::Block &teamsBlock = *teamsOp.getRegion().begin(); |
| 433 | auto loopOpIter = llvm::find_if(teamsBlock, [](mlir::Operation &op) { |
| 434 | auto nestedLoopOp = llvm::dyn_cast<mlir::omp::LoopOp>(&op); |
| 435 | |
| 436 | if (!nestedLoopOp) |
| 437 | return false; |
| 438 | |
| 439 | return !nestedLoopOp.getReductionVars().empty(); |
| 440 | }); |
| 441 | |
| 442 | if (loopOpIter == teamsBlock.end()) |
| 443 | return nullptr; |
| 444 | |
| 445 | // TODO return error if more than one loop op is nested. We need to |
| 446 | // coalesce reductions in this case. |
| 447 | return llvm::cast<mlir::omp::LoopOp>(loopOpIter); |
| 448 | } |
| 449 | |
| 450 | mlir::LogicalResult |
| 451 | matchAndRewrite(mlir::omp::TeamsOp teamsOp, OpAdaptor adaptor, |
| 452 | mlir::ConversionPatternRewriter &rewriter) const override { |
| 453 | mlir::omp::LoopOp nestedLoopOp = tryToFindNestedLoopWithReduction(teamsOp); |
| 454 | |
| 455 | rewriter.modifyOpInPlace(teamsOp, [&]() { |
| 456 | teamsOp.setReductionMod(nestedLoopOp.getReductionMod()); |
| 457 | teamsOp.getReductionVarsMutable().assign(nestedLoopOp.getReductionVars()); |
| 458 | teamsOp.setReductionByref(nestedLoopOp.getReductionByref()); |
| 459 | teamsOp.setReductionSymsAttr(nestedLoopOp.getReductionSymsAttr()); |
| 460 | |
| 461 | auto blockArgIface = |
| 462 | llvm::cast<mlir::omp::BlockArgOpenMPOpInterface>(*teamsOp); |
| 463 | unsigned reductionArgsStart = blockArgIface.getPrivateBlockArgsStart() + |
| 464 | blockArgIface.numPrivateBlockArgs(); |
| 465 | llvm::SmallVector<mlir::Value> newLoopOpReductionOperands; |
| 466 | |
| 467 | for (auto [idx, reductionVar] : |
| 468 | llvm::enumerate(nestedLoopOp.getReductionVars())) { |
| 469 | mlir::BlockArgument newTeamsOpReductionBlockArg = |
| 470 | teamsOp.getRegion().insertArgument(reductionArgsStart + idx, |
| 471 | reductionVar.getType(), |
| 472 | reductionVar.getLoc()); |
| 473 | newLoopOpReductionOperands.push_back(newTeamsOpReductionBlockArg); |
| 474 | } |
| 475 | |
| 476 | nestedLoopOp.getReductionVarsMutable().assign(newLoopOpReductionOperands); |
| 477 | }); |
| 478 | |
| 479 | return mlir::success(); |
| 480 | } |
| 481 | }; |
| 482 | |
| 483 | class GenericLoopConversionPass |
| 484 | : public flangomp::impl::GenericLoopConversionPassBase< |
| 485 | GenericLoopConversionPass> { |
| 486 | public: |
| 487 | GenericLoopConversionPass() = default; |
| 488 | |
| 489 | void runOnOperation() override { |
| 490 | mlir::func::FuncOp func = getOperation(); |
| 491 | |
| 492 | if (func.isDeclaration()) |
| 493 | return; |
| 494 | |
| 495 | mlir::MLIRContext *context = &getContext(); |
| 496 | mlir::RewritePatternSet patterns(context); |
| 497 | patterns.insert<ReductionsHoistingPattern, GenericLoopConversionPattern>( |
| 498 | context); |
| 499 | mlir::ConversionTarget target(*context); |
| 500 | |
| 501 | target.markUnknownOpDynamicallyLegal( |
| 502 | [](mlir::Operation *) { return true; }); |
| 503 | |
| 504 | target.addDynamicallyLegalOp<mlir::omp::TeamsOp>( |
| 505 | [](mlir::omp::TeamsOp teamsOp) { |
| 506 | // If teamsOp's reductions are already populated, then the op is |
| 507 | // legal. Additionally, the op is legal if it does not nest a LoopOp |
| 508 | // with reductions. |
| 509 | return !teamsOp.getReductionVars().empty() || |
| 510 | ReductionsHoistingPattern::tryToFindNestedLoopWithReduction( |
| 511 | teamsOp) == nullptr; |
| 512 | }); |
| 513 | |
| 514 | target.addDynamicallyLegalOp<mlir::omp::LoopOp>( |
| 515 | [](mlir::omp::LoopOp loopOp) { |
| 516 | return mlir::failed( |
| 517 | GenericLoopConversionPattern::checkLoopConversionSupportStatus( |
| 518 | loopOp)); |
| 519 | }); |
| 520 | |
| 521 | if (mlir::failed(mlir::applyFullConversion(getOperation(), target, |
| 522 | std::move(patterns)))) { |
| 523 | mlir::emitError(func.getLoc(), "error in converting `omp.loop` op" ); |
| 524 | signalPassFailure(); |
| 525 | } |
| 526 | } |
| 527 | }; |
| 528 | } // namespace |
| 529 | |