1//===- DoConcurrentConversion.cpp -- map `DO CONCURRENT` to OpenMP loops --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "flang/Optimizer/Builder/FIRBuilder.h"
10#include "flang/Optimizer/Builder/Todo.h"
11#include "flang/Optimizer/Dialect/FIROps.h"
12#include "flang/Optimizer/OpenMP/Passes.h"
13#include "flang/Optimizer/OpenMP/Utils.h"
14#include "flang/Support/OpenMP-utils.h"
15#include "mlir/Analysis/SliceAnalysis.h"
16#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
17#include "mlir/IR/IRMapping.h"
18#include "mlir/Transforms/DialectConversion.h"
19#include "mlir/Transforms/RegionUtils.h"
20
21namespace flangomp {
22#define GEN_PASS_DEF_DOCONCURRENTCONVERSIONPASS
23#include "flang/Optimizer/OpenMP/Passes.h.inc"
24} // namespace flangomp
25
26#define DEBUG_TYPE "do-concurrent-conversion"
27#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE << "]: ")
28
29namespace {
30namespace looputils {
31/// Stores info needed about the induction/iteration variable for each `do
32/// concurrent` in a loop nest.
33struct InductionVariableInfo {
34 InductionVariableInfo(fir::DoConcurrentLoopOp loop,
35 mlir::Value inductionVar) {
36 populateInfo(loop, inductionVar);
37 }
38 /// The operation allocating memory for iteration variable.
39 mlir::Operation *iterVarMemDef;
40 /// the operation(s) updating the iteration variable with the current
41 /// iteration number.
42 llvm::SmallVector<mlir::Operation *, 2> indVarUpdateOps;
43
44private:
45 /// For the \p doLoop parameter, find the following:
46 ///
47 /// 1. The operation that declares its iteration variable or allocates memory
48 /// for it. For example, give the following loop:
49 /// ```
50 /// ...
51 /// %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
52 /// ...
53 /// fir.do_concurrent.loop (%ind_var) = (%lb) to (%ub) step (%s) {
54 /// %ind_var_conv = fir.convert %ind_var : (index) -> i32
55 /// fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
56 /// ...
57 /// }
58 /// ```
59 ///
60 /// This function sets the `iterVarMemDef` member to the `hlfir.declare` op
61 /// for `%i`.
62 ///
63 /// 2. The operation(s) that update the loop's iteration variable from its
64 /// induction variable. For the above example, the `indVarUpdateOps` is
65 /// populated with the first 2 ops in the loop's body.
66 ///
67 /// Note: The current implementation is dependent on how flang emits loop
68 /// bodies; which is sufficient for the current simple test/use cases. If this
69 /// proves to be insufficient, this should be made more generic.
70 void populateInfo(fir::DoConcurrentLoopOp loop, mlir::Value inductionVar) {
71 mlir::Value result = nullptr;
72
73 // Checks if a StoreOp is updating the memref of the loop's iteration
74 // variable.
75 auto isStoringIV = [&](fir::StoreOp storeOp) {
76 // Direct store into the IV memref.
77 if (storeOp.getValue() == inductionVar) {
78 indVarUpdateOps.push_back(storeOp);
79 return true;
80 }
81
82 // Indirect store into the IV memref.
83 if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
84 storeOp.getValue().getDefiningOp())) {
85 if (convertOp.getOperand() == inductionVar) {
86 indVarUpdateOps.push_back(convertOp);
87 indVarUpdateOps.push_back(storeOp);
88 return true;
89 }
90 }
91
92 return false;
93 };
94
95 for (mlir::Operation &op : loop) {
96 if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
97 if (isStoringIV(storeOp)) {
98 result = storeOp.getMemref();
99 break;
100 }
101 }
102
103 assert(result != nullptr && result.getDefiningOp() != nullptr);
104 iterVarMemDef = result.getDefiningOp();
105 }
106};
107
108using InductionVariableInfos = llvm::SmallVector<InductionVariableInfo>;
109
110/// Collects values that are local to a loop: "loop-local values". A loop-local
111/// value is one that is used exclusively inside the loop but allocated outside
112/// of it. This usually corresponds to temporary values that are used inside the
113/// loop body for initialzing other variables for example.
114///
115/// See `flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90` for an
116/// example of why we need this.
117///
118/// \param [in] doLoop - the loop within which the function searches for values
119/// used exclusively inside.
120///
121/// \param [out] locals - the list of loop-local values detected for \p doLoop.
122void collectLoopLocalValues(fir::DoConcurrentLoopOp loop,
123 llvm::SetVector<mlir::Value> &locals) {
124 loop.walk([&](mlir::Operation *op) {
125 for (mlir::Value operand : op->getOperands()) {
126 if (locals.contains(operand))
127 continue;
128
129 bool isLocal = true;
130
131 if (!mlir::isa_and_present<fir::AllocaOp>(operand.getDefiningOp()))
132 continue;
133
134 // Values defined inside the loop are not interesting since they do not
135 // need to be localized.
136 if (loop->isAncestor(operand.getDefiningOp()))
137 continue;
138
139 for (auto *user : operand.getUsers()) {
140 if (!loop->isAncestor(user)) {
141 isLocal = false;
142 break;
143 }
144 }
145
146 if (isLocal)
147 locals.insert(operand);
148 }
149 });
150}
151
152/// For a "loop-local" value \p local within a loop's scope, localizes that
153/// value within the scope of the parallel region the loop maps to. Towards that
154/// end, this function moves the allocation of \p local within \p allocRegion.
155///
156/// \param local - the value used exclusively within a loop's scope (see
157/// collectLoopLocalValues).
158///
159/// \param allocRegion - the parallel region where \p local's allocation will be
160/// privatized.
161///
162/// \param rewriter - builder used for updating \p allocRegion.
163static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion,
164 mlir::ConversionPatternRewriter &rewriter) {
165 rewriter.moveOpBefore(local.getDefiningOp(), &allocRegion.front().front());
166}
167} // namespace looputils
168
169class DoConcurrentConversion
170 : public mlir::OpConversionPattern<fir::DoConcurrentOp> {
171public:
172 using mlir::OpConversionPattern<fir::DoConcurrentOp>::OpConversionPattern;
173
174 DoConcurrentConversion(
175 mlir::MLIRContext *context, bool mapToDevice,
176 llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip)
177 : OpConversionPattern(context), mapToDevice(mapToDevice),
178 concurrentLoopsToSkip(concurrentLoopsToSkip) {}
179
180 mlir::LogicalResult
181 matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor,
182 mlir::ConversionPatternRewriter &rewriter) const override {
183 if (mapToDevice)
184 return doLoop.emitError(
185 "not yet implemented: Mapping `do concurrent` loops to device");
186
187 looputils::InductionVariableInfos ivInfos;
188 auto loop = mlir::cast<fir::DoConcurrentLoopOp>(
189 doLoop.getRegion().back().getTerminator());
190
191 auto indVars = loop.getLoopInductionVars();
192 assert(indVars.has_value());
193
194 for (mlir::Value indVar : *indVars)
195 ivInfos.emplace_back(loop, indVar);
196
197 llvm::SetVector<mlir::Value> locals;
198 looputils::collectLoopLocalValues(loop, locals);
199
200 mlir::IRMapping mapper;
201 mlir::omp::ParallelOp parallelOp =
202 genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper);
203 mlir::omp::LoopNestOperands loopNestClauseOps;
204 genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
205 loopNestClauseOps);
206
207 for (mlir::Value local : locals)
208 looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
209 rewriter);
210
211 mlir::omp::LoopNestOp ompLoopNest =
212 genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
213 /*isComposite=*/mapToDevice);
214
215 rewriter.setInsertionPoint(doLoop);
216 fir::FirOpBuilder builder(
217 rewriter,
218 fir::getKindMapping(doLoop->getParentOfType<mlir::ModuleOp>()));
219
220 // Collect iteration variable(s) allocations so that we can move them
221 // outside the `fir.do_concurrent` wrapper (before erasing it).
222 llvm::SmallVector<mlir::Operation *> opsToMove;
223 for (mlir::Operation &op : llvm::drop_end(doLoop))
224 opsToMove.push_back(&op);
225
226 mlir::Block *allocBlock = builder.getAllocaBlock();
227
228 for (mlir::Operation *op : llvm::reverse(opsToMove)) {
229 rewriter.moveOpBefore(op, allocBlock, allocBlock->begin());
230 }
231
232 // Mark `unordered` loops that are not perfectly nested to be skipped from
233 // the legality check of the `ConversionTarget` since we are not interested
234 // in mapping them to OpenMP.
235 ompLoopNest->walk([&](fir::DoConcurrentOp doLoop) {
236 concurrentLoopsToSkip.insert(doLoop);
237 });
238
239 rewriter.eraseOp(doLoop);
240
241 return mlir::success();
242 }
243
244private:
245 mlir::omp::ParallelOp
246 genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
247 looputils::InductionVariableInfos &ivInfos,
248 mlir::IRMapping &mapper) const {
249 auto parallelOp = rewriter.create<mlir::omp::ParallelOp>(loc);
250 rewriter.createBlock(&parallelOp.getRegion());
251 rewriter.setInsertionPoint(rewriter.create<mlir::omp::TerminatorOp>(loc));
252
253 genLoopNestIndVarAllocs(rewriter, ivInfos, mapper);
254 return parallelOp;
255 }
256
257 void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter,
258 looputils::InductionVariableInfos &ivInfos,
259 mlir::IRMapping &mapper) const {
260
261 for (auto &indVarInfo : ivInfos)
262 genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper);
263 }
264
265 mlir::Operation *
266 genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
267 mlir::Operation *indVarMemDef,
268 mlir::IRMapping &mapper) const {
269 assert(
270 indVarMemDef != nullptr &&
271 "Induction variable memdef is expected to have a defining operation.");
272
273 llvm::SmallSetVector<mlir::Operation *, 2> indVarDeclareAndAlloc;
274 for (auto operand : indVarMemDef->getOperands())
275 indVarDeclareAndAlloc.insert(operand.getDefiningOp());
276 indVarDeclareAndAlloc.insert(indVarMemDef);
277
278 mlir::Operation *result;
279 for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
280 result = rewriter.clone(*opToClone, mapper);
281
282 return result;
283 }
284
285 void
286 genLoopNestClauseOps(mlir::Location loc,
287 mlir::ConversionPatternRewriter &rewriter,
288 fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
289 mlir::omp::LoopNestOperands &loopNestClauseOps) const {
290 assert(loopNestClauseOps.loopLowerBounds.empty() &&
291 "Loop nest bounds were already emitted!");
292
293 auto populateBounds = [](mlir::Value var,
294 llvm::SmallVectorImpl<mlir::Value> &bounds) {
295 bounds.push_back(var.getDefiningOp()->getResult(0));
296 };
297
298 for (auto [lb, ub, st] : llvm::zip_equal(
299 loop.getLowerBound(), loop.getUpperBound(), loop.getStep())) {
300 populateBounds(lb, loopNestClauseOps.loopLowerBounds);
301 populateBounds(ub, loopNestClauseOps.loopUpperBounds);
302 populateBounds(st, loopNestClauseOps.loopSteps);
303 }
304
305 loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
306 }
307
308 mlir::omp::LoopNestOp
309 genWsLoopOp(mlir::ConversionPatternRewriter &rewriter,
310 fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
311 const mlir::omp::LoopNestOperands &clauseOps,
312 bool isComposite) const {
313 mlir::omp::WsloopOperands wsloopClauseOps;
314
315 // For `local` (and `local_init`) opernads, emit corresponding `private`
316 // clauses and attach these clauses to the workshare loop.
317 if (!loop.getLocalOperands().empty())
318 for (auto [op, sym, arg] : llvm::zip_equal(
319 loop.getLocalOperands(),
320 loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
321 loop.getRegionLocalArgs())) {
322 auto localizer = mlir::SymbolTable::lookupNearestSymbolFrom<
323 fir::LocalitySpecifierOp>(loop, sym);
324 if (localizer.getLocalitySpecifierType() ==
325 fir::LocalitySpecifierType::LocalInit)
326 TODO(localizer.getLoc(),
327 "local_init conversion is not supported yet");
328
329 auto oldIP = rewriter.saveInsertionPoint();
330 rewriter.setInsertionPointAfter(localizer);
331 auto privatizer = rewriter.create<mlir::omp::PrivateClauseOp>(
332 localizer.getLoc(), sym.getLeafReference().str() + ".omp",
333 localizer.getTypeAttr().getValue(),
334 mlir::omp::DataSharingClauseType::Private);
335
336 if (!localizer.getInitRegion().empty()) {
337 rewriter.cloneRegionBefore(localizer.getInitRegion(),
338 privatizer.getInitRegion(),
339 privatizer.getInitRegion().begin());
340 auto firYield = mlir::cast<fir::YieldOp>(
341 privatizer.getInitRegion().back().getTerminator());
342 rewriter.setInsertionPoint(firYield);
343 rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
344 firYield.getOperands());
345 rewriter.eraseOp(firYield);
346 }
347
348 if (!localizer.getDeallocRegion().empty()) {
349 rewriter.cloneRegionBefore(localizer.getDeallocRegion(),
350 privatizer.getDeallocRegion(),
351 privatizer.getDeallocRegion().begin());
352 auto firYield = mlir::cast<fir::YieldOp>(
353 privatizer.getDeallocRegion().back().getTerminator());
354 rewriter.setInsertionPoint(firYield);
355 rewriter.create<mlir::omp::YieldOp>(firYield.getLoc(),
356 firYield.getOperands());
357 rewriter.eraseOp(firYield);
358 }
359
360 rewriter.restoreInsertionPoint(oldIP);
361
362 wsloopClauseOps.privateVars.push_back(op);
363 wsloopClauseOps.privateSyms.push_back(
364 mlir::SymbolRefAttr::get(privatizer));
365 }
366
367 auto wsloopOp =
368 rewriter.create<mlir::omp::WsloopOp>(loop.getLoc(), wsloopClauseOps);
369 wsloopOp.setComposite(isComposite);
370
371 Fortran::common::openmp::EntryBlockArgs wsloopArgs;
372 wsloopArgs.priv.vars = wsloopClauseOps.privateVars;
373 Fortran::common::openmp::genEntryBlock(rewriter, wsloopArgs,
374 wsloopOp.getRegion());
375
376 auto loopNestOp =
377 rewriter.create<mlir::omp::LoopNestOp>(loop.getLoc(), clauseOps);
378
379 // Clone the loop's body inside the loop nest construct using the
380 // mapped values.
381 rewriter.cloneRegionBefore(loop.getRegion(), loopNestOp.getRegion(),
382 loopNestOp.getRegion().begin(), mapper);
383
384 rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
385 rewriter.create<mlir::omp::YieldOp>(loop->getLoc());
386
387 // `local` region arguments are transferred/cloned from the `do concurrent`
388 // loop to the loopnest op when the region is cloned above. Instead, these
389 // region arguments should be on the workshare loop's region.
390 for (auto [wsloopArg, loopNestArg] :
391 llvm::zip_equal(wsloopOp.getRegion().getArguments(),
392 loopNestOp.getRegion().getArguments().drop_front(
393 clauseOps.loopLowerBounds.size())))
394 rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
395
396 for (unsigned i = 0; i < loop.getLocalVars().size(); ++i)
397 loopNestOp.getRegion().eraseArgument(clauseOps.loopLowerBounds.size());
398
399 return loopNestOp;
400 }
401
402 bool mapToDevice;
403 llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
404};
405
406class DoConcurrentConversionPass
407 : public flangomp::impl::DoConcurrentConversionPassBase<
408 DoConcurrentConversionPass> {
409public:
410 DoConcurrentConversionPass() = default;
411
412 DoConcurrentConversionPass(
413 const flangomp::DoConcurrentConversionPassOptions &options)
414 : DoConcurrentConversionPassBase(options) {}
415
416 void runOnOperation() override {
417 mlir::func::FuncOp func = getOperation();
418
419 if (func.isDeclaration())
420 return;
421
422 mlir::MLIRContext *context = &getContext();
423
424 if (mapTo != flangomp::DoConcurrentMappingKind::DCMK_Host &&
425 mapTo != flangomp::DoConcurrentMappingKind::DCMK_Device) {
426 mlir::emitWarning(mlir::UnknownLoc::get(context),
427 "DoConcurrentConversionPass: invalid `map-to` value. "
428 "Valid values are: `host` or `device`");
429 return;
430 }
431
432 llvm::DenseSet<fir::DoConcurrentOp> concurrentLoopsToSkip;
433 mlir::RewritePatternSet patterns(context);
434 patterns.insert<DoConcurrentConversion>(
435 context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device,
436 concurrentLoopsToSkip);
437 mlir::ConversionTarget target(*context);
438 target.addDynamicallyLegalOp<fir::DoConcurrentOp>(
439 [&](fir::DoConcurrentOp op) {
440 return concurrentLoopsToSkip.contains(op);
441 });
442 target.markUnknownOpDynamicallyLegal(
443 [](mlir::Operation *) { return true; });
444
445 if (mlir::failed(mlir::applyFullConversion(getOperation(), target,
446 std::move(patterns)))) {
447 signalPassFailure();
448 }
449 }
450};
451} // namespace
452
453std::unique_ptr<mlir::Pass>
454flangomp::createDoConcurrentConversionPass(bool mapToDevice) {
455 DoConcurrentConversionPassOptions options;
456 options.mapTo = mapToDevice ? flangomp::DoConcurrentMappingKind::DCMK_Device
457 : flangomp::DoConcurrentMappingKind::DCMK_Host;
458
459 return std::make_unique<DoConcurrentConversionPass>(options);
460}
461

source code of flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp