1 | //===-- OpenMP.cpp -- Open MP directive lowering --------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "flang/Lower/OpenMP.h" |
14 | |
15 | #include "ClauseProcessor.h" |
16 | #include "Clauses.h" |
17 | #include "DataSharingProcessor.h" |
18 | #include "DirectivesCommon.h" |
19 | #include "ReductionProcessor.h" |
20 | #include "Utils.h" |
21 | #include "flang/Common/idioms.h" |
22 | #include "flang/Lower/Bridge.h" |
23 | #include "flang/Lower/ConvertExpr.h" |
24 | #include "flang/Lower/ConvertVariable.h" |
25 | #include "flang/Lower/StatementContext.h" |
26 | #include "flang/Lower/SymbolMap.h" |
27 | #include "flang/Optimizer/Builder/BoxValue.h" |
28 | #include "flang/Optimizer/Builder/FIRBuilder.h" |
29 | #include "flang/Optimizer/Builder/Todo.h" |
30 | #include "flang/Optimizer/Dialect/FIRType.h" |
31 | #include "flang/Optimizer/HLFIR/HLFIROps.h" |
32 | #include "flang/Parser/parse-tree.h" |
33 | #include "flang/Semantics/openmp-directive-sets.h" |
34 | #include "flang/Semantics/tools.h" |
35 | #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" |
36 | #include "mlir/Dialect/OpenMP/OpenMPDialect.h" |
37 | #include "mlir/Transforms/RegionUtils.h" |
38 | #include "llvm/ADT/STLExtras.h" |
39 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
40 | |
41 | using namespace Fortran::lower::omp; |
42 | |
43 | //===----------------------------------------------------------------------===// |
44 | // Code generation helper functions |
45 | //===----------------------------------------------------------------------===// |
46 | |
47 | static Fortran::lower::pft::Evaluation * |
48 | getCollapsedLoopEval(Fortran::lower::pft::Evaluation &eval, int collapseValue) { |
49 | // Return the Evaluation of the innermost collapsed loop, or the current one |
50 | // if there was no COLLAPSE. |
51 | if (collapseValue == 0) |
52 | return &eval; |
53 | |
54 | Fortran::lower::pft::Evaluation *curEval = &eval.getFirstNestedEvaluation(); |
55 | for (int i = 1; i < collapseValue; i++) { |
56 | // The nested evaluations should be DoConstructs (i.e. they should form |
57 | // a loop nest). Each DoConstruct is a tuple <NonLabelDoStmt, Block, |
58 | // EndDoStmt>. |
59 | assert(curEval->isA<Fortran::parser::DoConstruct>()); |
60 | curEval = &*std::next(curEval->getNestedEvaluations().begin()); |
61 | } |
62 | return curEval; |
63 | } |
64 | |
65 | static void genNestedEvaluations(Fortran::lower::AbstractConverter &converter, |
66 | Fortran::lower::pft::Evaluation &eval, |
67 | int collapseValue = 0) { |
68 | Fortran::lower::pft::Evaluation *curEval = |
69 | getCollapsedLoopEval(eval, collapseValue); |
70 | |
71 | for (Fortran::lower::pft::Evaluation &e : curEval->getNestedEvaluations()) |
72 | converter.genEval(e); |
73 | } |
74 | |
75 | static fir::GlobalOp globalInitialization( |
76 | Fortran::lower::AbstractConverter &converter, |
77 | fir::FirOpBuilder &firOpBuilder, const Fortran::semantics::Symbol &sym, |
78 | const Fortran::lower::pft::Variable &var, mlir::Location currentLocation) { |
79 | mlir::Type ty = converter.genType(sym); |
80 | std::string globalName = converter.mangleName(sym); |
81 | mlir::StringAttr linkage = firOpBuilder.createInternalLinkage(); |
82 | fir::GlobalOp global = |
83 | firOpBuilder.createGlobal(currentLocation, ty, globalName, linkage); |
84 | |
85 | // Create default initialization for non-character scalar. |
86 | if (Fortran::semantics::IsAllocatableOrObjectPointer(&sym)) { |
87 | mlir::Type baseAddrType = ty.dyn_cast<fir::BoxType>().getEleTy(); |
88 | Fortran::lower::createGlobalInitialization( |
89 | firOpBuilder, global, [&](fir::FirOpBuilder &b) { |
90 | mlir::Value nullAddr = |
91 | b.createNullConstant(currentLocation, baseAddrType); |
92 | mlir::Value box = |
93 | b.create<fir::EmboxOp>(currentLocation, ty, nullAddr); |
94 | b.create<fir::HasValueOp>(currentLocation, box); |
95 | }); |
96 | } else { |
97 | Fortran::lower::createGlobalInitialization( |
98 | firOpBuilder, global, [&](fir::FirOpBuilder &b) { |
99 | mlir::Value undef = b.create<fir::UndefOp>(currentLocation, ty); |
100 | b.create<fir::HasValueOp>(currentLocation, undef); |
101 | }); |
102 | } |
103 | |
104 | return global; |
105 | } |
106 | |
107 | // Get the extended value for \p val by extracting additional variable |
108 | // information from \p base. |
109 | static fir::ExtendedValue getExtendedValue(fir::ExtendedValue base, |
110 | mlir::Value val) { |
111 | return base.match( |
112 | [&](const fir::MutableBoxValue &box) -> fir::ExtendedValue { |
113 | return fir::MutableBoxValue(val, box.nonDeferredLenParams(), {}); |
114 | }, |
115 | [&](const auto &) -> fir::ExtendedValue { |
116 | return fir::substBase(base, val); |
117 | }); |
118 | } |
119 | |
120 | #ifndef NDEBUG |
121 | static bool isThreadPrivate(Fortran::lower::SymbolRef sym) { |
122 | if (const auto *details = |
123 | sym->detailsIf<Fortran::semantics::CommonBlockDetails>()) { |
124 | for (const auto &obj : details->objects()) |
125 | if (!obj->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate)) |
126 | return false; |
127 | return true; |
128 | } |
129 | return sym->test(Fortran::semantics::Symbol::Flag::OmpThreadprivate); |
130 | } |
131 | #endif |
132 | |
133 | static void threadPrivatizeVars(Fortran::lower::AbstractConverter &converter, |
134 | Fortran::lower::pft::Evaluation &eval) { |
135 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
136 | mlir::Location currentLocation = converter.getCurrentLocation(); |
137 | mlir::OpBuilder::InsertionGuard guard(firOpBuilder); |
138 | firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); |
139 | |
140 | // If the symbol corresponds to the original ThreadprivateOp, use the symbol |
141 | // value from that operation to create one ThreadprivateOp copy operation |
142 | // inside the parallel region. |
143 | // In some cases, however, the symbol will correspond to the original, |
144 | // non-threadprivate variable. This can happen, for instance, with a common |
145 | // block, declared in a separate module, used by a parent procedure and |
146 | // privatized in its child procedure. |
147 | auto genThreadprivateOp = [&](Fortran::lower::SymbolRef sym) -> mlir::Value { |
148 | assert(isThreadPrivate(sym)); |
149 | mlir::Value symValue = converter.getSymbolAddress(sym); |
150 | mlir::Operation *op = symValue.getDefiningOp(); |
151 | if (auto declOp = mlir::dyn_cast<hlfir::DeclareOp>(op)) |
152 | op = declOp.getMemref().getDefiningOp(); |
153 | if (mlir::isa<mlir::omp::ThreadprivateOp>(op)) |
154 | symValue = mlir::dyn_cast<mlir::omp::ThreadprivateOp>(op).getSymAddr(); |
155 | return firOpBuilder.create<mlir::omp::ThreadprivateOp>( |
156 | currentLocation, symValue.getType(), symValue); |
157 | }; |
158 | |
159 | llvm::SetVector<const Fortran::semantics::Symbol *> threadprivateSyms; |
160 | converter.collectSymbolSet(eval, threadprivateSyms, |
161 | Fortran::semantics::Symbol::Flag::OmpThreadprivate, |
162 | /*collectSymbols=*/true, |
163 | /*collectHostAssociatedSymbols=*/true); |
164 | std::set<Fortran::semantics::SourceName> threadprivateSymNames; |
165 | |
166 | // For a COMMON block, the ThreadprivateOp is generated for itself instead of |
167 | // its members, so only bind the value of the new copied ThreadprivateOp |
168 | // inside the parallel region to the common block symbol only once for |
169 | // multiple members in one COMMON block. |
170 | llvm::SetVector<const Fortran::semantics::Symbol *> commonSyms; |
171 | for (std::size_t i = 0; i < threadprivateSyms.size(); i++) { |
172 | const Fortran::semantics::Symbol *sym = threadprivateSyms[i]; |
173 | mlir::Value symThreadprivateValue; |
174 | // The variable may be used more than once, and each reference has one |
175 | // symbol with the same name. Only do once for references of one variable. |
176 | if (threadprivateSymNames.find(sym->name()) != threadprivateSymNames.end()) |
177 | continue; |
178 | threadprivateSymNames.insert(sym->name()); |
179 | if (const Fortran::semantics::Symbol *common = |
180 | Fortran::semantics::FindCommonBlockContaining(sym->GetUltimate())) { |
181 | mlir::Value commonThreadprivateValue; |
182 | if (commonSyms.contains(common)) { |
183 | commonThreadprivateValue = converter.getSymbolAddress(*common); |
184 | } else { |
185 | commonThreadprivateValue = genThreadprivateOp(*common); |
186 | converter.bindSymbol(*common, commonThreadprivateValue); |
187 | commonSyms.insert(common); |
188 | } |
189 | symThreadprivateValue = Fortran::lower::genCommonBlockMember( |
190 | converter, currentLocation, *sym, commonThreadprivateValue); |
191 | } else { |
192 | symThreadprivateValue = genThreadprivateOp(*sym); |
193 | } |
194 | |
195 | fir::ExtendedValue sexv = converter.getSymbolExtendedValue(*sym); |
196 | fir::ExtendedValue symThreadprivateExv = |
197 | getExtendedValue(sexv, symThreadprivateValue); |
198 | converter.bindSymbol(*sym, symThreadprivateExv); |
199 | } |
200 | } |
201 | |
202 | static mlir::Operation * |
203 | createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, |
204 | mlir::Location loc, mlir::Value indexVal, |
205 | const Fortran::semantics::Symbol *sym) { |
206 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
207 | mlir::OpBuilder::InsertPoint insPt = firOpBuilder.saveInsertionPoint(); |
208 | firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); |
209 | |
210 | mlir::Type tempTy = converter.genType(*sym); |
211 | mlir::Value temp = firOpBuilder.create<fir::AllocaOp>( |
212 | loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, |
213 | /*shapeParams*/ mlir::ValueRange{}, |
214 | llvm::ArrayRef<mlir::NamedAttribute>{ |
215 | fir::getAdaptToByRefAttr(firOpBuilder)}); |
216 | converter.bindSymbol(*sym, temp); |
217 | firOpBuilder.restoreInsertionPoint(insPt); |
218 | mlir::Value cvtVal = firOpBuilder.createConvert(loc, tempTy, indexVal); |
219 | mlir::Operation *storeOp = firOpBuilder.create<fir::StoreOp>( |
220 | loc, cvtVal, converter.getSymbolAddress(*sym)); |
221 | return storeOp; |
222 | } |
223 | |
224 | // This helper function implements the functionality of "promoting" |
225 | // non-CPTR arguments of use_device_ptr to use_device_addr |
226 | // arguments (automagic conversion of use_device_ptr -> |
227 | // use_device_addr in these cases). The way we do so currently is |
228 | // through the shuffling of operands from the devicePtrOperands to |
229 | // deviceAddrOperands where neccesary and re-organizing the types, |
230 | // locations and symbols to maintain the correct ordering of ptr/addr |
231 | // input -> BlockArg. |
232 | // |
233 | // This effectively implements some deprecated OpenMP functionality |
234 | // that some legacy applications unfortunately depend on |
235 | // (deprecated in specification version 5.2): |
236 | // |
237 | // "If a list item in a use_device_ptr clause is not of type C_PTR, |
238 | // the behavior is as if the list item appeared in a use_device_addr |
239 | // clause. Support for such list items in a use_device_ptr clause |
240 | // is deprecated." |
241 | static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( |
242 | mlir::omp::UseDeviceClauseOps &clauseOps, |
243 | llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes, |
244 | llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs, |
245 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> |
246 | &useDeviceSymbols) { |
247 | auto moveElementToBack = [](size_t idx, auto &vector) { |
248 | auto *iter = std::next(vector.begin(), idx); |
249 | vector.push_back(*iter); |
250 | vector.erase(iter); |
251 | }; |
252 | |
253 | // Iterate over our use_device_ptr list and shift all non-cptr arguments into |
254 | // use_device_addr. |
255 | for (auto *it = clauseOps.useDevicePtrVars.begin(); |
256 | it != clauseOps.useDevicePtrVars.end();) { |
257 | if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) { |
258 | clauseOps.useDeviceAddrVars.push_back(*it); |
259 | // We have to shuffle the symbols around as well, to maintain |
260 | // the correct Input -> BlockArg for use_device_ptr/use_device_addr. |
261 | // NOTE: However, as map's do not seem to be included currently |
262 | // this isn't as pertinent, but we must try to maintain for |
263 | // future alterations. I believe the reason they are not currently |
264 | // is that the BlockArg assign/lowering needs to be extended |
265 | // to a greater set of types. |
266 | auto idx = std::distance(clauseOps.useDevicePtrVars.begin(), it); |
267 | moveElementToBack(idx, useDeviceTypes); |
268 | moveElementToBack(idx, useDeviceLocs); |
269 | moveElementToBack(idx, useDeviceSymbols); |
270 | it = clauseOps.useDevicePtrVars.erase(it); |
271 | continue; |
272 | } |
273 | ++it; |
274 | } |
275 | } |
276 | |
277 | /// Extract the list of function and variable symbols affected by the given |
278 | /// 'declare target' directive and return the intended device type for them. |
279 | static void getDeclareTargetInfo( |
280 | Fortran::lower::AbstractConverter &converter, |
281 | Fortran::semantics::SemanticsContext &semaCtx, |
282 | Fortran::lower::pft::Evaluation &eval, |
283 | const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, |
284 | mlir::omp::DeclareTargetClauseOps &clauseOps, |
285 | llvm::SmallVectorImpl<DeclareTargetCapturePair> &symbolAndClause) { |
286 | const auto &spec = std::get<Fortran::parser::OmpDeclareTargetSpecifier>( |
287 | declareTargetConstruct.t); |
288 | if (const auto *objectList{ |
289 | Fortran::parser::Unwrap<Fortran::parser::OmpObjectList>(spec.u)}) { |
290 | ObjectList objects{makeObjects(*objectList, semaCtx)}; |
291 | // Case: declare target(func, var1, var2) |
292 | gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, |
293 | symbolAndClause); |
294 | } else if (const auto *clauseList{ |
295 | Fortran::parser::Unwrap<Fortran::parser::OmpClauseList>( |
296 | spec.u)}) { |
297 | List<Clause> clauses = makeClauses(*clauseList, semaCtx); |
298 | if (clauses.empty()) { |
299 | // Case: declare target, implicit capture of function |
300 | symbolAndClause.emplace_back( |
301 | mlir::omp::DeclareTargetCaptureClause::to, |
302 | eval.getOwningProcedure()->getSubprogramSymbol()); |
303 | } |
304 | |
305 | ClauseProcessor cp(converter, semaCtx, clauses); |
306 | cp.processDeviceType(clauseOps); |
307 | cp.processEnter(symbolAndClause); |
308 | cp.processLink(symbolAndClause); |
309 | cp.processTo(symbolAndClause); |
310 | |
311 | cp.processTODO<clause::Indirect>(converter.getCurrentLocation(), |
312 | llvm::omp::Directive::OMPD_declare_target); |
313 | } |
314 | } |
315 | |
316 | static void collectDeferredDeclareTargets( |
317 | Fortran::lower::AbstractConverter &converter, |
318 | Fortran::semantics::SemanticsContext &semaCtx, |
319 | Fortran::lower::pft::Evaluation &eval, |
320 | const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, |
321 | llvm::SmallVectorImpl<Fortran::lower::OMPDeferredDeclareTargetInfo> |
322 | &deferredDeclareTarget) { |
323 | mlir::omp::DeclareTargetClauseOps clauseOps; |
324 | llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; |
325 | getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, |
326 | clauseOps, symbolAndClause); |
327 | // Return the device type only if at least one of the targets for the |
328 | // directive is a function or subroutine |
329 | mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); |
330 | |
331 | for (const DeclareTargetCapturePair &symClause : symbolAndClause) { |
332 | mlir::Operation *op = mod.lookupSymbol(converter.mangleName( |
333 | std::get<const Fortran::semantics::Symbol &>(symClause))); |
334 | |
335 | if (!op) { |
336 | deferredDeclareTarget.push_back({std::get<0>(symClause), |
337 | clauseOps.deviceType, |
338 | std::get<1>(symClause)}); |
339 | } |
340 | } |
341 | } |
342 | |
343 | static std::optional<mlir::omp::DeclareTargetDeviceType> |
344 | getDeclareTargetFunctionDevice( |
345 | Fortran::lower::AbstractConverter &converter, |
346 | Fortran::semantics::SemanticsContext &semaCtx, |
347 | Fortran::lower::pft::Evaluation &eval, |
348 | const Fortran::parser::OpenMPDeclareTargetConstruct |
349 | &declareTargetConstruct) { |
350 | mlir::omp::DeclareTargetClauseOps clauseOps; |
351 | llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; |
352 | getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, |
353 | clauseOps, symbolAndClause); |
354 | |
355 | // Return the device type only if at least one of the targets for the |
356 | // directive is a function or subroutine |
357 | mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); |
358 | for (const DeclareTargetCapturePair &symClause : symbolAndClause) { |
359 | mlir::Operation *op = mod.lookupSymbol(converter.mangleName( |
360 | std::get<const Fortran::semantics::Symbol &>(symClause))); |
361 | |
362 | if (mlir::isa_and_nonnull<mlir::func::FuncOp>(op)) |
363 | return clauseOps.deviceType; |
364 | } |
365 | |
366 | return std::nullopt; |
367 | } |
368 | |
369 | /// Set up the entry block of the given `omp.loop_nest` operation, adding a |
370 | /// block argument for each loop induction variable and allocating and |
371 | /// initializing a private value to hold each of them. |
372 | /// |
373 | /// This function can also bind the symbols of any variables that should match |
374 | /// block arguments on parent loop wrapper operations attached to the same |
375 | /// loop. This allows the introduction of any necessary `hlfir.declare` |
376 | /// operations inside of the entry block of the `omp.loop_nest` operation and |
377 | /// not directly under any of the wrappers, which would invalidate them. |
378 | /// |
379 | /// \param [in] op - the loop nest operation. |
380 | /// \param [in] converter - PFT to MLIR conversion interface. |
381 | /// \param [in] loc - location. |
382 | /// \param [in] args - symbols of induction variables. |
383 | /// \param [in] wrapperSyms - symbols of variables to be mapped to loop wrapper |
384 | /// entry block arguments. |
385 | /// \param [in] wrapperArgs - entry block arguments of parent loop wrappers. |
386 | static void |
387 | genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, |
388 | mlir::Location &loc, |
389 | llvm::ArrayRef<const Fortran::semantics::Symbol *> args, |
390 | llvm::ArrayRef<const Fortran::semantics::Symbol *> wrapperSyms = {}, |
391 | llvm::ArrayRef<mlir::BlockArgument> wrapperArgs = {}) { |
392 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
393 | auto ®ion = op->getRegion(0); |
394 | |
395 | std::size_t loopVarTypeSize = 0; |
396 | for (const Fortran::semantics::Symbol *arg : args) |
397 | loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); |
398 | mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); |
399 | llvm::SmallVector<mlir::Type> tiv(args.size(), loopVarType); |
400 | llvm::SmallVector<mlir::Location> locs(args.size(), loc); |
401 | firOpBuilder.createBlock(®ion, {}, tiv, locs); |
402 | |
403 | // Bind the entry block arguments of parent wrappers to the corresponding |
404 | // symbols. |
405 | for (auto [arg, prv] : llvm::zip_equal(wrapperSyms, wrapperArgs)) |
406 | converter.bindSymbol(*arg, prv); |
407 | |
408 | // The argument is not currently in memory, so make a temporary for the |
409 | // argument, and store it there, then bind that location to the argument. |
410 | mlir::Operation *storeOp = nullptr; |
411 | for (auto [argIndex, argSymbol] : llvm::enumerate(First&: args)) { |
412 | mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); |
413 | storeOp = |
414 | createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); |
415 | } |
416 | firOpBuilder.setInsertionPointAfter(storeOp); |
417 | } |
418 | |
419 | static void genReductionVars( |
420 | mlir::Operation *op, Fortran::lower::AbstractConverter &converter, |
421 | mlir::Location &loc, |
422 | llvm::ArrayRef<const Fortran::semantics::Symbol *> reductionArgs, |
423 | llvm::ArrayRef<mlir::Type> reductionTypes) { |
424 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
425 | llvm::SmallVector<mlir::Location> blockArgLocs(reductionArgs.size(), loc); |
426 | |
427 | mlir::Block *entryBlock = firOpBuilder.createBlock( |
428 | &op->getRegion(0), {}, reductionTypes, blockArgLocs); |
429 | |
430 | // Bind the reduction arguments to their block arguments. |
431 | for (auto [arg, prv] : |
432 | llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { |
433 | converter.bindSymbol(*arg, prv); |
434 | } |
435 | } |
436 | |
437 | static void |
438 | markDeclareTarget(mlir::Operation *op, |
439 | Fortran::lower::AbstractConverter &converter, |
440 | mlir::omp::DeclareTargetCaptureClause captureClause, |
441 | mlir::omp::DeclareTargetDeviceType deviceType) { |
442 | // TODO: Add support for program local variables with declare target applied |
443 | auto declareTargetOp = llvm::dyn_cast<mlir::omp::DeclareTargetInterface>(op); |
444 | if (!declareTargetOp) |
445 | fir::emitFatalError( |
446 | converter.getCurrentLocation(), |
447 | "Attempt to apply declare target on unsupported operation" ); |
448 | |
449 | // The function or global already has a declare target applied to it, very |
450 | // likely through implicit capture (usage in another declare target |
451 | // function/subroutine). It should be marked as any if it has been assigned |
452 | // both host and nohost, else we skip, as there is no change |
453 | if (declareTargetOp.isDeclareTarget()) { |
454 | if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) |
455 | declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, |
456 | captureClause); |
457 | return; |
458 | } |
459 | |
460 | declareTargetOp.setDeclareTarget(deviceType, captureClause); |
461 | } |
462 | |
463 | /// Split a combined directive into an outer leaf directive and the (possibly |
464 | /// combined) rest of the combined directive. Composite directives and |
465 | /// non-compound directives are not split, in which case it will return the |
466 | /// input directive as its first output and an empty value as its second output. |
467 | static std::pair<llvm::omp::Directive, std::optional<llvm::omp::Directive>> |
468 | splitCombinedDirective(llvm::omp::Directive dir) { |
469 | using D = llvm::omp::Directive; |
470 | switch (dir) { |
471 | case D::OMPD_masked_taskloop: |
472 | return {D::OMPD_masked, D::OMPD_taskloop}; |
473 | case D::OMPD_masked_taskloop_simd: |
474 | return {D::OMPD_masked, D::OMPD_taskloop_simd}; |
475 | case D::OMPD_master_taskloop: |
476 | return {D::OMPD_master, D::OMPD_taskloop}; |
477 | case D::OMPD_master_taskloop_simd: |
478 | return {D::OMPD_master, D::OMPD_taskloop_simd}; |
479 | case D::OMPD_parallel_do: |
480 | return {D::OMPD_parallel, D::OMPD_do}; |
481 | case D::OMPD_parallel_do_simd: |
482 | return {D::OMPD_parallel, D::OMPD_do_simd}; |
483 | case D::OMPD_parallel_masked: |
484 | return {D::OMPD_parallel, D::OMPD_masked}; |
485 | case D::OMPD_parallel_masked_taskloop: |
486 | return {D::OMPD_parallel, D::OMPD_masked_taskloop}; |
487 | case D::OMPD_parallel_masked_taskloop_simd: |
488 | return {D::OMPD_parallel, D::OMPD_masked_taskloop_simd}; |
489 | case D::OMPD_parallel_master: |
490 | return {D::OMPD_parallel, D::OMPD_master}; |
491 | case D::OMPD_parallel_master_taskloop: |
492 | return {D::OMPD_parallel, D::OMPD_master_taskloop}; |
493 | case D::OMPD_parallel_master_taskloop_simd: |
494 | return {D::OMPD_parallel, D::OMPD_master_taskloop_simd}; |
495 | case D::OMPD_parallel_sections: |
496 | return {D::OMPD_parallel, D::OMPD_sections}; |
497 | case D::OMPD_parallel_workshare: |
498 | return {D::OMPD_parallel, D::OMPD_workshare}; |
499 | case D::OMPD_target_parallel: |
500 | return {D::OMPD_target, D::OMPD_parallel}; |
501 | case D::OMPD_target_parallel_do: |
502 | return {D::OMPD_target, D::OMPD_parallel_do}; |
503 | case D::OMPD_target_parallel_do_simd: |
504 | return {D::OMPD_target, D::OMPD_parallel_do_simd}; |
505 | case D::OMPD_target_simd: |
506 | return {D::OMPD_target, D::OMPD_simd}; |
507 | case D::OMPD_target_teams: |
508 | return {D::OMPD_target, D::OMPD_teams}; |
509 | case D::OMPD_target_teams_distribute: |
510 | return {D::OMPD_target, D::OMPD_teams_distribute}; |
511 | case D::OMPD_target_teams_distribute_parallel_do: |
512 | return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do}; |
513 | case D::OMPD_target_teams_distribute_parallel_do_simd: |
514 | return {D::OMPD_target, D::OMPD_teams_distribute_parallel_do_simd}; |
515 | case D::OMPD_target_teams_distribute_simd: |
516 | return {D::OMPD_target, D::OMPD_teams_distribute_simd}; |
517 | case D::OMPD_teams_distribute: |
518 | return {D::OMPD_teams, D::OMPD_distribute}; |
519 | case D::OMPD_teams_distribute_parallel_do: |
520 | return {D::OMPD_teams, D::OMPD_distribute_parallel_do}; |
521 | case D::OMPD_teams_distribute_parallel_do_simd: |
522 | return {D::OMPD_teams, D::OMPD_distribute_parallel_do_simd}; |
523 | case D::OMPD_teams_distribute_simd: |
524 | return {D::OMPD_teams, D::OMPD_distribute_simd}; |
525 | case D::OMPD_parallel_loop: |
526 | return {D::OMPD_parallel, D::OMPD_loop}; |
527 | case D::OMPD_target_parallel_loop: |
528 | return {D::OMPD_target, D::OMPD_parallel_loop}; |
529 | case D::OMPD_target_teams_loop: |
530 | return {D::OMPD_target, D::OMPD_teams_loop}; |
531 | case D::OMPD_teams_loop: |
532 | return {D::OMPD_teams, D::OMPD_loop}; |
533 | default: |
534 | return {dir, std::nullopt}; |
535 | } |
536 | } |
537 | |
538 | //===----------------------------------------------------------------------===// |
539 | // Op body generation helper structures and functions |
540 | //===----------------------------------------------------------------------===// |
541 | |
542 | struct OpWithBodyGenInfo { |
543 | /// A type for a code-gen callback function. This takes as argument the op for |
544 | /// which the code is being generated and returns the arguments of the op's |
545 | /// region. |
546 | using GenOMPRegionEntryCBFn = |
547 | std::function<llvm::SmallVector<const Fortran::semantics::Symbol *>( |
548 | mlir::Operation *)>; |
549 | |
550 | OpWithBodyGenInfo(Fortran::lower::AbstractConverter &converter, |
551 | Fortran::semantics::SemanticsContext &semaCtx, |
552 | mlir::Location loc, Fortran::lower::pft::Evaluation &eval, |
553 | llvm::omp::Directive dir) |
554 | : converter(converter), semaCtx(semaCtx), loc(loc), eval(eval), dir(dir) { |
555 | } |
556 | |
557 | OpWithBodyGenInfo &setGenNested(bool value) { |
558 | genNested = value; |
559 | return *this; |
560 | } |
561 | |
562 | OpWithBodyGenInfo &setOuterCombined(bool value) { |
563 | outerCombined = value; |
564 | return *this; |
565 | } |
566 | |
567 | OpWithBodyGenInfo &setClauses(const List<Clause> *value) { |
568 | clauses = value; |
569 | return *this; |
570 | } |
571 | |
572 | OpWithBodyGenInfo &setDataSharingProcessor(DataSharingProcessor *value) { |
573 | dsp = value; |
574 | return *this; |
575 | } |
576 | |
577 | OpWithBodyGenInfo &setReductions( |
578 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *value1, |
579 | llvm::SmallVectorImpl<mlir::Type> *value2) { |
580 | reductionSymbols = value1; |
581 | reductionTypes = value2; |
582 | return *this; |
583 | } |
584 | |
585 | OpWithBodyGenInfo &setGenRegionEntryCb(GenOMPRegionEntryCBFn value) { |
586 | genRegionEntryCB = value; |
587 | return *this; |
588 | } |
589 | |
590 | /// [inout] converter to use for the clauses. |
591 | Fortran::lower::AbstractConverter &converter; |
592 | /// [in] Semantics context |
593 | Fortran::semantics::SemanticsContext &semaCtx; |
594 | /// [in] location in source code. |
595 | mlir::Location loc; |
596 | /// [in] current PFT node/evaluation. |
597 | Fortran::lower::pft::Evaluation &eval; |
598 | /// [in] leaf directive for which to generate the op body. |
599 | llvm::omp::Directive dir; |
600 | /// [in] whether to generate FIR for nested evaluations |
601 | bool genNested = true; |
602 | /// [in] is this an outer operation - prevents privatization. |
603 | bool outerCombined = false; |
604 | /// [in] list of clauses to process. |
605 | const List<Clause> *clauses = nullptr; |
606 | /// [in] if provided, processes the construct's data-sharing attributes. |
607 | DataSharingProcessor *dsp = nullptr; |
608 | /// [in] if provided, list of reduction symbols |
609 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> *reductionSymbols = |
610 | nullptr; |
611 | /// [in] if provided, list of reduction types |
612 | llvm::SmallVectorImpl<mlir::Type> *reductionTypes = nullptr; |
613 | /// [in] if provided, emits the op's region entry. Otherwise, an emtpy block |
614 | /// is created in the region. |
615 | GenOMPRegionEntryCBFn genRegionEntryCB = nullptr; |
616 | }; |
617 | |
618 | /// Create the body (block) for an OpenMP Operation. |
619 | /// |
620 | /// \param [in] op - the operation the body belongs to. |
621 | /// \param [in] info - options controlling code-gen for the construction. |
622 | static void createBodyOfOp(mlir::Operation &op, OpWithBodyGenInfo &info) { |
623 | fir::FirOpBuilder &firOpBuilder = info.converter.getFirOpBuilder(); |
624 | |
625 | auto insertMarker = [](fir::FirOpBuilder &builder) { |
626 | mlir::Value undef = builder.create<fir::UndefOp>(builder.getUnknownLoc(), |
627 | builder.getIndexType()); |
628 | return undef.getDefiningOp(); |
629 | }; |
630 | |
631 | // If an argument for the region is provided then create the block with that |
632 | // argument. Also update the symbol's address with the mlir argument value. |
633 | // e.g. For loops the argument is the induction variable. And all further |
634 | // uses of the induction variable should use this mlir value. |
635 | auto regionArgs = |
636 | [&]() -> llvm::SmallVector<const Fortran::semantics::Symbol *> { |
637 | if (info.genRegionEntryCB != nullptr) { |
638 | return info.genRegionEntryCB(&op); |
639 | } |
640 | |
641 | firOpBuilder.createBlock(&op.getRegion(0)); |
642 | return {}; |
643 | }(); |
644 | // Mark the earliest insertion point. |
645 | mlir::Operation *marker = insertMarker(firOpBuilder); |
646 | |
647 | // If it is an unstructured region and is not the outer region of a combined |
648 | // construct, create empty blocks for all evaluations. |
649 | if (info.eval.lowerAsUnstructured() && !info.outerCombined) |
650 | Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, |
651 | mlir::omp::YieldOp>( |
652 | firOpBuilder, info.eval.getNestedEvaluations()); |
653 | |
654 | // Start with privatization, so that the lowering of the nested |
655 | // code will use the right symbols. |
656 | bool isLoop = llvm::omp::getDirectiveAssociation(info.dir) == |
657 | llvm::omp::Association::Loop; |
658 | bool privatize = info.clauses && !info.outerCombined; |
659 | |
660 | firOpBuilder.setInsertionPoint(marker); |
661 | std::optional<DataSharingProcessor> tempDsp; |
662 | if (privatize) { |
663 | if (!info.dsp) { |
664 | tempDsp.emplace(info.converter, info.semaCtx, *info.clauses, info.eval); |
665 | tempDsp->processStep1(); |
666 | } |
667 | } |
668 | |
669 | if (info.dir == llvm::omp::Directive::OMPD_parallel) { |
670 | threadPrivatizeVars(info.converter, info.eval); |
671 | if (info.clauses) { |
672 | firOpBuilder.setInsertionPoint(marker); |
673 | ClauseProcessor(info.converter, info.semaCtx, *info.clauses) |
674 | .processCopyin(); |
675 | } |
676 | } |
677 | |
678 | if (info.genNested) { |
679 | // genFIR(Evaluation&) tries to patch up unterminated blocks, causing |
680 | // a lot of complications for our approach if the terminator generation |
681 | // is delayed past this point. Insert a temporary terminator here, then |
682 | // delete it. |
683 | firOpBuilder.setInsertionPointToEnd(&op.getRegion(0).back()); |
684 | auto *temp = |
685 | Fortran::lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); |
686 | firOpBuilder.setInsertionPointAfter(marker); |
687 | genNestedEvaluations(info.converter, info.eval); |
688 | temp->erase(); |
689 | } |
690 | |
691 | // Get or create a unique exiting block from the given region, or |
692 | // return nullptr if there is no exiting block. |
693 | auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { |
694 | // Find the blocks where the OMP terminator should go. In simple cases |
695 | // it is the single block in the operation's region. When the region |
696 | // is more complicated, especially with unstructured control flow, there |
697 | // may be multiple blocks, and some of them may have non-OMP terminators |
698 | // resulting from lowering of the code contained within the operation. |
699 | // All the remaining blocks are potential exit points from the op's region. |
700 | // |
701 | // Explicit control flow cannot exit any OpenMP region (other than via |
702 | // STOP), and that is enforced by semantic checks prior to lowering. STOP |
703 | // statements are lowered to a function call. |
704 | |
705 | // Collect unterminated blocks. |
706 | llvm::SmallVector<mlir::Block *> exits; |
707 | for (mlir::Block &b : region) { |
708 | if (b.empty() || !b.back().hasTrait<mlir::OpTrait::IsTerminator>()) |
709 | exits.push_back(&b); |
710 | } |
711 | |
712 | if (exits.empty()) |
713 | return nullptr; |
714 | // If there already is a unique exiting block, do not create another one. |
715 | // Additionally, some ops (e.g. omp.sections) require only 1 block in |
716 | // its region. |
717 | if (exits.size() == 1) |
718 | return exits[0]; |
719 | mlir::Block *exit = firOpBuilder.createBlock(®ion); |
720 | for (mlir::Block *b : exits) { |
721 | firOpBuilder.setInsertionPointToEnd(b); |
722 | firOpBuilder.create<mlir::cf::BranchOp>(info.loc, exit); |
723 | } |
724 | return exit; |
725 | }; |
726 | |
727 | if (auto *exitBlock = getUniqueExit(op.getRegion(0))) { |
728 | firOpBuilder.setInsertionPointToEnd(exitBlock); |
729 | auto *term = |
730 | Fortran::lower::genOpenMPTerminator(firOpBuilder, &op, info.loc); |
731 | // Only insert lastprivate code when there actually is an exit block. |
732 | // Such a block may not exist if the nested code produced an infinite |
733 | // loop (this may not make sense in production code, but a user could |
734 | // write that and we should handle it). |
735 | firOpBuilder.setInsertionPoint(term); |
736 | if (privatize) { |
737 | // DataSharingProcessor::processStep2() may create operations before/after |
738 | // the one passed as argument. We need to treat loop wrappers and their |
739 | // nested loop as a unit, so we need to pass the top level wrapper (if |
740 | // present). Otherwise, these operations will be inserted within a |
741 | // wrapper region. |
742 | mlir::Operation *privatizationTopLevelOp = &op; |
743 | if (auto loopNest = llvm::dyn_cast<mlir::omp::LoopNestOp>(op)) { |
744 | llvm::SmallVector<mlir::omp::LoopWrapperInterface> wrappers; |
745 | loopNest.gatherWrappers(wrappers); |
746 | if (!wrappers.empty()) |
747 | privatizationTopLevelOp = &*wrappers.back(); |
748 | } |
749 | |
750 | if (!info.dsp) { |
751 | assert(tempDsp.has_value()); |
752 | tempDsp->processStep2(privatizationTopLevelOp, isLoop); |
753 | } else { |
754 | if (isLoop && regionArgs.size() > 0) |
755 | info.dsp->setLoopIV(info.converter.getSymbolAddress(*regionArgs[0])); |
756 | info.dsp->processStep2(privatizationTopLevelOp, isLoop); |
757 | } |
758 | } |
759 | } |
760 | |
761 | firOpBuilder.setInsertionPointAfter(marker); |
762 | marker->erase(); |
763 | } |
764 | |
765 | static void genBodyOfTargetDataOp( |
766 | Fortran::lower::AbstractConverter &converter, |
767 | Fortran::semantics::SemanticsContext &semaCtx, |
768 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
769 | mlir::omp::TargetDataOp &dataOp, llvm::ArrayRef<mlir::Type> useDeviceTypes, |
770 | llvm::ArrayRef<mlir::Location> useDeviceLocs, |
771 | llvm::ArrayRef<const Fortran::semantics::Symbol *> useDeviceSymbols, |
772 | const mlir::Location ¤tLocation) { |
773 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
774 | mlir::Region ®ion = dataOp.getRegion(); |
775 | |
776 | firOpBuilder.createBlock(®ion, {}, useDeviceTypes, useDeviceLocs); |
777 | |
778 | for (auto [argIndex, argSymbol] : llvm::enumerate(First&: useDeviceSymbols)) { |
779 | const mlir::BlockArgument &arg = region.front().getArgument(argIndex); |
780 | fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol); |
781 | if (auto refType = arg.getType().dyn_cast<fir::ReferenceType>()) { |
782 | if (fir::isa_builtin_cptr_type(refType.getElementType())) { |
783 | converter.bindSymbol(*argSymbol, arg); |
784 | } else { |
785 | // Avoid capture of a reference to a structured binding. |
786 | const Fortran::semantics::Symbol *sym = argSymbol; |
787 | extVal.match( |
788 | [&](const fir::MutableBoxValue &mbv) { |
789 | converter.bindSymbol( |
790 | *sym, |
791 | fir::MutableBoxValue( |
792 | arg, fir::factory::getNonDeferredLenParams(extVal), {})); |
793 | }, |
794 | [&](const auto &) { |
795 | TODO(converter.getCurrentLocation(), |
796 | "use_device clause operand unsupported type" ); |
797 | }); |
798 | } |
799 | } else { |
800 | TODO(converter.getCurrentLocation(), |
801 | "use_device clause operand unsupported type" ); |
802 | } |
803 | } |
804 | |
805 | // Insert dummy instruction to remember the insertion position. The |
806 | // marker will be deleted by clean up passes since there are no uses. |
807 | // Remembering the position for further insertion is important since |
808 | // there are hlfir.declares inserted above while setting block arguments |
809 | // and new code from the body should be inserted after that. |
810 | mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>( |
811 | dataOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); |
812 | |
813 | // Create blocks for unstructured regions. This has to be done since |
814 | // blocks are initially allocated with the function as the parent region. |
815 | if (eval.lowerAsUnstructured()) { |
816 | Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, |
817 | mlir::omp::YieldOp>( |
818 | firOpBuilder, eval.getNestedEvaluations()); |
819 | } |
820 | |
821 | firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation); |
822 | |
823 | // Set the insertion point after the marker. |
824 | firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); |
825 | if (genNested) |
826 | genNestedEvaluations(converter, eval); |
827 | } |
828 | |
829 | // This functions creates a block for the body of the targetOp's region. It adds |
830 | // all the symbols present in mapSymbols as block arguments to this block. |
831 | static void |
832 | genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter, |
833 | Fortran::semantics::SemanticsContext &semaCtx, |
834 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
835 | mlir::omp::TargetOp &targetOp, |
836 | llvm::ArrayRef<const Fortran::semantics::Symbol *> mapSyms, |
837 | llvm::ArrayRef<mlir::Location> mapSymLocs, |
838 | llvm::ArrayRef<mlir::Type> mapSymTypes, |
839 | const mlir::Location ¤tLocation) { |
840 | assert(mapSymTypes.size() == mapSymLocs.size()); |
841 | |
842 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
843 | mlir::Region ®ion = targetOp.getRegion(); |
844 | |
845 | auto *regionBlock = |
846 | firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); |
847 | |
848 | // Clones the `bounds` placing them inside the target region and returns them. |
849 | auto cloneBound = [&](mlir::Value bound) { |
850 | if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { |
851 | mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); |
852 | regionBlock->push_back(clonedOp); |
853 | return clonedOp->getResult(0); |
854 | } |
855 | TODO(converter.getCurrentLocation(), |
856 | "target map clause operand unsupported bound type" ); |
857 | }; |
858 | |
859 | auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) { |
860 | llvm::SmallVector<mlir::Value> clonedBounds; |
861 | for (mlir::Value bound : bounds) |
862 | clonedBounds.emplace_back(cloneBound(bound)); |
863 | return clonedBounds; |
864 | }; |
865 | |
866 | // Bind the symbols to their corresponding block arguments. |
867 | for (auto [argIndex, argSymbol] : llvm::enumerate(First&: mapSyms)) { |
868 | const mlir::BlockArgument &arg = region.getArgument(argIndex); |
869 | // Avoid capture of a reference to a structured binding. |
870 | const Fortran::semantics::Symbol *sym = argSymbol; |
871 | // Structure component symbols don't have bindings. |
872 | if (sym->owner().IsDerivedType()) |
873 | continue; |
874 | fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); |
875 | extVal.match( |
876 | [&](const fir::BoxValue &v) { |
877 | converter.bindSymbol(*sym, |
878 | fir::BoxValue(arg, cloneBounds(v.getLBounds()), |
879 | v.getExplicitParameters(), |
880 | v.getExplicitExtents())); |
881 | }, |
882 | [&](const fir::MutableBoxValue &v) { |
883 | converter.bindSymbol( |
884 | *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), |
885 | v.getMutableProperties())); |
886 | }, |
887 | [&](const fir::ArrayBoxValue &v) { |
888 | converter.bindSymbol( |
889 | *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), |
890 | cloneBounds(v.getLBounds()), |
891 | v.getSourceBox())); |
892 | }, |
893 | [&](const fir::CharArrayBoxValue &v) { |
894 | converter.bindSymbol( |
895 | *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), |
896 | cloneBounds(v.getExtents()), |
897 | cloneBounds(v.getLBounds()))); |
898 | }, |
899 | [&](const fir::CharBoxValue &v) { |
900 | converter.bindSymbol(*sym, |
901 | fir::CharBoxValue(arg, cloneBound(v.getLen()))); |
902 | }, |
903 | [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, |
904 | [&](const auto &) { |
905 | TODO(converter.getCurrentLocation(), |
906 | "target map clause operand unsupported type" ); |
907 | }); |
908 | } |
909 | |
910 | // Check if cloning the bounds introduced any dependency on the outer region. |
911 | // If so, then either clone them as well if they are MemoryEffectFree, or else |
912 | // copy them to a new temporary and add them to the map and block_argument |
913 | // lists and replace their uses with the new temporary. |
914 | llvm::SetVector<mlir::Value> valuesDefinedAbove; |
915 | mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); |
916 | while (!valuesDefinedAbove.empty()) { |
917 | for (mlir::Value val : valuesDefinedAbove) { |
918 | mlir::Operation *valOp = val.getDefiningOp(); |
919 | if (mlir::isMemoryEffectFree(valOp)) { |
920 | mlir::Operation *clonedOp = valOp->clone(); |
921 | regionBlock->push_front(clonedOp); |
922 | val.replaceUsesWithIf( |
923 | clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { |
924 | return use.getOwner()->getBlock() == regionBlock; |
925 | }); |
926 | } else { |
927 | auto savedIP = firOpBuilder.getInsertionPoint(); |
928 | firOpBuilder.setInsertionPointAfter(valOp); |
929 | auto copyVal = |
930 | firOpBuilder.createTemporary(val.getLoc(), val.getType()); |
931 | firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); |
932 | |
933 | llvm::SmallVector<mlir::Value> bounds; |
934 | std::stringstream name; |
935 | firOpBuilder.setInsertionPoint(targetOp); |
936 | mlir::Value mapOp = createMapInfoOp( |
937 | firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), |
938 | bounds, llvm::SmallVector<mlir::Value>{}, |
939 | static_cast< |
940 | std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( |
941 | llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), |
942 | mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); |
943 | targetOp.getMapOperandsMutable().append(mapOp); |
944 | mlir::Value clonedValArg = |
945 | region.addArgument(copyVal.getType(), copyVal.getLoc()); |
946 | firOpBuilder.setInsertionPointToStart(regionBlock); |
947 | auto loadOp = firOpBuilder.create<fir::LoadOp>(clonedValArg.getLoc(), |
948 | clonedValArg); |
949 | val.replaceUsesWithIf( |
950 | loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { |
951 | return use.getOwner()->getBlock() == regionBlock; |
952 | }); |
953 | firOpBuilder.setInsertionPoint(regionBlock, savedIP); |
954 | } |
955 | } |
956 | valuesDefinedAbove.clear(); |
957 | mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); |
958 | } |
959 | |
960 | // Insert dummy instruction to remember the insertion position. The |
961 | // marker will be deleted since there are not uses. |
962 | // In the HLFIR flow there are hlfir.declares inserted above while |
963 | // setting block arguments. |
964 | mlir::Value undefMarker = firOpBuilder.create<fir::UndefOp>( |
965 | targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); |
966 | |
967 | // Create blocks for unstructured regions. This has to be done since |
968 | // blocks are initially allocated with the function as the parent region. |
969 | if (eval.lowerAsUnstructured()) { |
970 | Fortran::lower::createEmptyRegionBlocks<mlir::omp::TerminatorOp, |
971 | mlir::omp::YieldOp>( |
972 | firOpBuilder, eval.getNestedEvaluations()); |
973 | } |
974 | |
975 | firOpBuilder.create<mlir::omp::TerminatorOp>(currentLocation); |
976 | |
977 | // Create the insertion point after the marker. |
978 | firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); |
979 | if (genNested) |
980 | genNestedEvaluations(converter, eval); |
981 | } |
982 | |
983 | template <typename OpTy, typename... Args> |
984 | static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { |
985 | auto op = info.converter.getFirOpBuilder().create<OpTy>( |
986 | info.loc, std::forward<Args>(args)...); |
987 | createBodyOfOp(*op, info); |
988 | return op; |
989 | } |
990 | |
991 | //===----------------------------------------------------------------------===// |
992 | // Code generation functions for clauses |
993 | //===----------------------------------------------------------------------===// |
994 | |
995 | static void |
996 | genCriticalDeclareClauses(Fortran::lower::AbstractConverter &converter, |
997 | Fortran::semantics::SemanticsContext &semaCtx, |
998 | const List<Clause> &clauses, mlir::Location loc, |
999 | mlir::omp::CriticalClauseOps &clauseOps, |
1000 | llvm::StringRef name) { |
1001 | ClauseProcessor cp(converter, semaCtx, clauses); |
1002 | cp.processHint(clauseOps); |
1003 | clauseOps.nameAttr = |
1004 | mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name); |
1005 | } |
1006 | |
1007 | static void genFlushClauses(Fortran::lower::AbstractConverter &converter, |
1008 | Fortran::semantics::SemanticsContext &semaCtx, |
1009 | const ObjectList &objects, |
1010 | const List<Clause> &clauses, mlir::Location loc, |
1011 | llvm::SmallVectorImpl<mlir::Value> &operandRange) { |
1012 | if (!objects.empty()) |
1013 | genObjectList(objects, converter, operandRange); |
1014 | |
1015 | if (!clauses.empty()) |
1016 | TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause" ); |
1017 | } |
1018 | |
1019 | static void genLoopNestClauses( |
1020 | Fortran::lower::AbstractConverter &converter, |
1021 | Fortran::semantics::SemanticsContext &semaCtx, |
1022 | Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses, |
1023 | mlir::Location loc, mlir::omp::LoopNestClauseOps &clauseOps, |
1024 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &iv) { |
1025 | ClauseProcessor cp(converter, semaCtx, clauses); |
1026 | cp.processCollapse(loc, eval, clauseOps, iv); |
1027 | clauseOps.loopInclusiveAttr = converter.getFirOpBuilder().getUnitAttr(); |
1028 | } |
1029 | |
1030 | static void |
1031 | genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter, |
1032 | Fortran::semantics::SemanticsContext &semaCtx, |
1033 | const List<Clause> &clauses, mlir::Location loc, |
1034 | mlir::omp::OrderedRegionClauseOps &clauseOps) { |
1035 | ClauseProcessor cp(converter, semaCtx, clauses); |
1036 | cp.processTODO<clause::Simd>(loc, llvm::omp::Directive::OMPD_ordered); |
1037 | } |
1038 | |
1039 | static void genParallelClauses( |
1040 | Fortran::lower::AbstractConverter &converter, |
1041 | Fortran::semantics::SemanticsContext &semaCtx, |
1042 | Fortran::lower::StatementContext &stmtCtx, const List<Clause> &clauses, |
1043 | mlir::Location loc, bool processReduction, |
1044 | mlir::omp::ParallelClauseOps &clauseOps, |
1045 | llvm::SmallVectorImpl<mlir::Type> &reductionTypes, |
1046 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &reductionSyms) { |
1047 | ClauseProcessor cp(converter, semaCtx, clauses); |
1048 | cp.processAllocate(clauseOps); |
1049 | cp.processDefault(); |
1050 | cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); |
1051 | cp.processNumThreads(stmtCtx, clauseOps); |
1052 | cp.processProcBind(clauseOps); |
1053 | |
1054 | if (processReduction) { |
1055 | cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); |
1056 | if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) |
1057 | clauseOps.reductionByRefAttr = converter.getFirOpBuilder().getUnitAttr(); |
1058 | } |
1059 | } |
1060 | |
1061 | static void genSectionsClauses(Fortran::lower::AbstractConverter &converter, |
1062 | Fortran::semantics::SemanticsContext &semaCtx, |
1063 | const List<Clause> &clauses, mlir::Location loc, |
1064 | mlir::omp::SectionsClauseOps &clauseOps) { |
1065 | ClauseProcessor cp(converter, semaCtx, clauses); |
1066 | cp.processAllocate(clauseOps); |
1067 | cp.processSectionsReduction(loc, clauseOps); |
1068 | cp.processNowait(clauseOps); |
1069 | // TODO Support delayed privatization. |
1070 | } |
1071 | |
1072 | static void genSimdClauses(Fortran::lower::AbstractConverter &converter, |
1073 | Fortran::semantics::SemanticsContext &semaCtx, |
1074 | const List<Clause> &clauses, mlir::Location loc, |
1075 | mlir::omp::SimdClauseOps &clauseOps) { |
1076 | ClauseProcessor cp(converter, semaCtx, clauses); |
1077 | cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); |
1078 | cp.processReduction(loc, clauseOps); |
1079 | cp.processSafelen(clauseOps); |
1080 | cp.processSimdlen(clauseOps); |
1081 | // TODO Support delayed privatization. |
1082 | |
1083 | cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear, |
1084 | clause::Nontemporal, clause::Order>( |
1085 | loc, llvm::omp::Directive::OMPD_simd); |
1086 | } |
1087 | |
1088 | static void genSingleClauses(Fortran::lower::AbstractConverter &converter, |
1089 | Fortran::semantics::SemanticsContext &semaCtx, |
1090 | const List<Clause> &clauses, mlir::Location loc, |
1091 | mlir::omp::SingleClauseOps &clauseOps) { |
1092 | ClauseProcessor cp(converter, semaCtx, clauses); |
1093 | cp.processAllocate(clauseOps); |
1094 | cp.processCopyprivate(loc, clauseOps); |
1095 | cp.processNowait(clauseOps); |
1096 | // TODO Support delayed privatization. |
1097 | } |
1098 | |
1099 | static void genTargetClauses( |
1100 | Fortran::lower::AbstractConverter &converter, |
1101 | Fortran::semantics::SemanticsContext &semaCtx, |
1102 | Fortran::lower::StatementContext &stmtCtx, const List<Clause> &clauses, |
1103 | mlir::Location loc, bool processHostOnlyClauses, bool processReduction, |
1104 | mlir::omp::TargetClauseOps &clauseOps, |
1105 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &mapSyms, |
1106 | llvm::SmallVectorImpl<mlir::Location> &mapLocs, |
1107 | llvm::SmallVectorImpl<mlir::Type> &mapTypes, |
1108 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &deviceAddrSyms, |
1109 | llvm::SmallVectorImpl<mlir::Location> &deviceAddrLocs, |
1110 | llvm::SmallVectorImpl<mlir::Type> &deviceAddrTypes, |
1111 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &devicePtrSyms, |
1112 | llvm::SmallVectorImpl<mlir::Location> &devicePtrLocs, |
1113 | llvm::SmallVectorImpl<mlir::Type> &devicePtrTypes) { |
1114 | ClauseProcessor cp(converter, semaCtx, clauses); |
1115 | cp.processDepend(clauseOps); |
1116 | cp.processDevice(stmtCtx, clauseOps); |
1117 | cp.processHasDeviceAddr(clauseOps, deviceAddrTypes, deviceAddrLocs, |
1118 | deviceAddrSyms); |
1119 | cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); |
1120 | cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs, |
1121 | devicePtrSyms); |
1122 | cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes); |
1123 | cp.processThreadLimit(stmtCtx, clauseOps); |
1124 | // TODO Support delayed privatization. |
1125 | |
1126 | if (processHostOnlyClauses) |
1127 | cp.processNowait(clauseOps); |
1128 | |
1129 | cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate, |
1130 | clause::InReduction, clause::Private, clause::Reduction, |
1131 | clause::UsesAllocators>(loc, |
1132 | llvm::omp::Directive::OMPD_target); |
1133 | } |
1134 | |
1135 | static void genTargetDataClauses( |
1136 | Fortran::lower::AbstractConverter &converter, |
1137 | Fortran::semantics::SemanticsContext &semaCtx, |
1138 | Fortran::lower::StatementContext &stmtCtx, const List<Clause> &clauses, |
1139 | mlir::Location loc, mlir::omp::TargetDataClauseOps &clauseOps, |
1140 | llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes, |
1141 | llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs, |
1142 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms) { |
1143 | ClauseProcessor cp(converter, semaCtx, clauses); |
1144 | cp.processDevice(stmtCtx, clauseOps); |
1145 | cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); |
1146 | cp.processMap(loc, stmtCtx, clauseOps); |
1147 | cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs, |
1148 | useDeviceSyms); |
1149 | cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs, |
1150 | useDeviceSyms); |
1151 | |
1152 | // This function implements the deprecated functionality of use_device_ptr |
1153 | // that allows users to provide non-CPTR arguments to it with the caveat |
1154 | // that the compiler will treat them as use_device_addr. A lot of legacy |
1155 | // code may still depend on this functionality, so we should support it |
1156 | // in some manner. We do so currently by simply shifting non-cptr operands |
1157 | // from the use_device_ptr list into the front of the use_device_addr list |
1158 | // whilst maintaining the ordering of useDeviceLocs, useDeviceSyms and |
1159 | // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg |
1160 | // ordering. |
1161 | // TODO: Perhaps create a user provideable compiler option that will |
1162 | // re-introduce a hard-error rather than a warning in these cases. |
1163 | promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(clauseOps, useDeviceTypes, |
1164 | useDeviceLocs, useDeviceSyms); |
1165 | } |
1166 | |
1167 | static void genTargetEnterExitUpdateDataClauses( |
1168 | Fortran::lower::AbstractConverter &converter, |
1169 | Fortran::semantics::SemanticsContext &semaCtx, |
1170 | Fortran::lower::StatementContext &stmtCtx, const List<Clause> &clauses, |
1171 | mlir::Location loc, llvm::omp::Directive directive, |
1172 | mlir::omp::TargetEnterExitUpdateDataClauseOps &clauseOps) { |
1173 | ClauseProcessor cp(converter, semaCtx, clauses); |
1174 | cp.processDepend(clauseOps); |
1175 | cp.processDevice(stmtCtx, clauseOps); |
1176 | cp.processIf(directive, clauseOps); |
1177 | cp.processNowait(clauseOps); |
1178 | |
1179 | if (directive == llvm::omp::Directive::OMPD_target_update) { |
1180 | cp.processMotionClauses<clause::To>(stmtCtx, clauseOps); |
1181 | cp.processMotionClauses<clause::From>(stmtCtx, clauseOps); |
1182 | } else { |
1183 | cp.processMap(loc, stmtCtx, clauseOps); |
1184 | } |
1185 | } |
1186 | |
1187 | static void genTaskClauses(Fortran::lower::AbstractConverter &converter, |
1188 | Fortran::semantics::SemanticsContext &semaCtx, |
1189 | Fortran::lower::StatementContext &stmtCtx, |
1190 | const List<Clause> &clauses, mlir::Location loc, |
1191 | mlir::omp::TaskClauseOps &clauseOps) { |
1192 | ClauseProcessor cp(converter, semaCtx, clauses); |
1193 | cp.processAllocate(clauseOps); |
1194 | cp.processDefault(); |
1195 | cp.processDepend(clauseOps); |
1196 | cp.processFinal(stmtCtx, clauseOps); |
1197 | cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); |
1198 | cp.processMergeable(clauseOps); |
1199 | cp.processPriority(stmtCtx, clauseOps); |
1200 | cp.processUntied(clauseOps); |
1201 | // TODO Support delayed privatization. |
1202 | |
1203 | cp.processTODO<clause::Affinity, clause::Detach, clause::InReduction>( |
1204 | loc, llvm::omp::Directive::OMPD_task); |
1205 | } |
1206 | |
1207 | static void genTaskgroupClauses(Fortran::lower::AbstractConverter &converter, |
1208 | Fortran::semantics::SemanticsContext &semaCtx, |
1209 | const List<Clause> &clauses, mlir::Location loc, |
1210 | mlir::omp::TaskgroupClauseOps &clauseOps) { |
1211 | ClauseProcessor cp(converter, semaCtx, clauses); |
1212 | cp.processAllocate(clauseOps); |
1213 | cp.processTODO<clause::TaskReduction>(loc, |
1214 | llvm::omp::Directive::OMPD_taskgroup); |
1215 | } |
1216 | |
1217 | static void genTaskwaitClauses(Fortran::lower::AbstractConverter &converter, |
1218 | Fortran::semantics::SemanticsContext &semaCtx, |
1219 | const List<Clause> &clauses, mlir::Location loc, |
1220 | mlir::omp::TaskwaitClauseOps &clauseOps) { |
1221 | ClauseProcessor cp(converter, semaCtx, clauses); |
1222 | cp.processTODO<clause::Depend, clause::Nowait>( |
1223 | loc, llvm::omp::Directive::OMPD_taskwait); |
1224 | } |
1225 | |
1226 | static void genTeamsClauses(Fortran::lower::AbstractConverter &converter, |
1227 | Fortran::semantics::SemanticsContext &semaCtx, |
1228 | Fortran::lower::StatementContext &stmtCtx, |
1229 | const List<Clause> &clauses, mlir::Location loc, |
1230 | mlir::omp::TeamsClauseOps &clauseOps) { |
1231 | ClauseProcessor cp(converter, semaCtx, clauses); |
1232 | cp.processAllocate(clauseOps); |
1233 | cp.processDefault(); |
1234 | cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); |
1235 | cp.processNumTeams(stmtCtx, clauseOps); |
1236 | cp.processThreadLimit(stmtCtx, clauseOps); |
1237 | // TODO Support delayed privatization. |
1238 | |
1239 | cp.processTODO<clause::Reduction>(loc, llvm::omp::Directive::OMPD_teams); |
1240 | } |
1241 | |
1242 | static void genWsloopClauses( |
1243 | Fortran::lower::AbstractConverter &converter, |
1244 | Fortran::semantics::SemanticsContext &semaCtx, |
1245 | Fortran::lower::StatementContext &stmtCtx, const List<Clause> &clauses, |
1246 | mlir::Location loc, mlir::omp::WsloopClauseOps &clauseOps, |
1247 | llvm::SmallVectorImpl<mlir::Type> &reductionTypes, |
1248 | llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &reductionSyms) { |
1249 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1250 | ClauseProcessor cp(converter, semaCtx, clauses); |
1251 | cp.processNowait(clauseOps); |
1252 | cp.processOrdered(clauseOps); |
1253 | cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); |
1254 | cp.processSchedule(stmtCtx, clauseOps); |
1255 | // TODO Support delayed privatization. |
1256 | |
1257 | if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) |
1258 | clauseOps.reductionByRefAttr = firOpBuilder.getUnitAttr(); |
1259 | |
1260 | cp.processTODO<clause::Allocate, clause::Linear, clause::Order>( |
1261 | loc, llvm::omp::Directive::OMPD_do); |
1262 | } |
1263 | |
1264 | //===----------------------------------------------------------------------===// |
1265 | // Code generation functions for leaf constructs |
1266 | //===----------------------------------------------------------------------===// |
1267 | |
1268 | static mlir::omp::BarrierOp |
1269 | genBarrierOp(Fortran::lower::AbstractConverter &converter, |
1270 | Fortran::semantics::SemanticsContext &semaCtx, |
1271 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc) { |
1272 | return converter.getFirOpBuilder().create<mlir::omp::BarrierOp>(loc); |
1273 | } |
1274 | |
1275 | static mlir::omp::CriticalOp |
1276 | genCriticalOp(Fortran::lower::AbstractConverter &converter, |
1277 | Fortran::semantics::SemanticsContext &semaCtx, |
1278 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1279 | mlir::Location loc, const List<Clause> &clauses, |
1280 | const std::optional<Fortran::parser::Name> &name) { |
1281 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1282 | mlir::FlatSymbolRefAttr nameAttr; |
1283 | |
1284 | if (name) { |
1285 | std::string nameStr = name->ToString(); |
1286 | mlir::ModuleOp mod = firOpBuilder.getModule(); |
1287 | auto global = mod.lookupSymbol<mlir::omp::CriticalDeclareOp>(nameStr); |
1288 | if (!global) { |
1289 | mlir::omp::CriticalClauseOps clauseOps; |
1290 | genCriticalDeclareClauses(converter, semaCtx, clauses, loc, clauseOps, |
1291 | nameStr); |
1292 | |
1293 | mlir::OpBuilder modBuilder(mod.getBodyRegion()); |
1294 | global = modBuilder.create<mlir::omp::CriticalDeclareOp>(loc, clauseOps); |
1295 | } |
1296 | nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), |
1297 | global.getSymName()); |
1298 | } |
1299 | |
1300 | return genOpWithBody<mlir::omp::CriticalOp>( |
1301 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1302 | llvm::omp::Directive::OMPD_critical) |
1303 | .setGenNested(genNested), |
1304 | nameAttr); |
1305 | } |
1306 | |
1307 | static mlir::omp::DistributeOp |
1308 | genDistributeOp(Fortran::lower::AbstractConverter &converter, |
1309 | Fortran::semantics::SemanticsContext &semaCtx, |
1310 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1311 | mlir::Location loc, const List<Clause> &clauses) { |
1312 | TODO(loc, "Distribute construct" ); |
1313 | return nullptr; |
1314 | } |
1315 | |
1316 | static mlir::omp::FlushOp |
1317 | genFlushOp(Fortran::lower::AbstractConverter &converter, |
1318 | Fortran::semantics::SemanticsContext &semaCtx, |
1319 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1320 | const ObjectList &objects, const List<Clause> &clauses) { |
1321 | llvm::SmallVector<mlir::Value> operandRange; |
1322 | genFlushClauses(converter, semaCtx, objects, clauses, loc, operandRange); |
1323 | |
1324 | return converter.getFirOpBuilder().create<mlir::omp::FlushOp>( |
1325 | converter.getCurrentLocation(), operandRange); |
1326 | } |
1327 | |
1328 | static mlir::omp::MasterOp |
1329 | genMasterOp(Fortran::lower::AbstractConverter &converter, |
1330 | Fortran::semantics::SemanticsContext &semaCtx, |
1331 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1332 | mlir::Location loc) { |
1333 | return genOpWithBody<mlir::omp::MasterOp>( |
1334 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1335 | llvm::omp::Directive::OMPD_master) |
1336 | .setGenNested(genNested)); |
1337 | } |
1338 | |
1339 | static mlir::omp::OrderedOp |
1340 | genOrderedOp(Fortran::lower::AbstractConverter &converter, |
1341 | Fortran::semantics::SemanticsContext &semaCtx, |
1342 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1343 | const List<Clause> &clauses) { |
1344 | TODO(loc, "OMPD_ordered" ); |
1345 | return nullptr; |
1346 | } |
1347 | |
1348 | static mlir::omp::OrderedRegionOp |
1349 | genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, |
1350 | Fortran::semantics::SemanticsContext &semaCtx, |
1351 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1352 | mlir::Location loc, const List<Clause> &clauses) { |
1353 | mlir::omp::OrderedRegionClauseOps clauseOps; |
1354 | genOrderedRegionClauses(converter, semaCtx, clauses, loc, clauseOps); |
1355 | |
1356 | return genOpWithBody<mlir::omp::OrderedRegionOp>( |
1357 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1358 | llvm::omp::Directive::OMPD_ordered) |
1359 | .setGenNested(genNested), |
1360 | clauseOps); |
1361 | } |
1362 | |
1363 | static mlir::omp::ParallelOp |
1364 | genParallelOp(Fortran::lower::AbstractConverter &converter, |
1365 | Fortran::lower::SymMap &symTable, |
1366 | Fortran::semantics::SemanticsContext &semaCtx, |
1367 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1368 | mlir::Location loc, const List<Clause> &clauses, |
1369 | bool outerCombined = false) { |
1370 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1371 | Fortran::lower::StatementContext stmtCtx; |
1372 | mlir::omp::ParallelClauseOps clauseOps; |
1373 | llvm::SmallVector<const Fortran::semantics::Symbol *> privateSyms; |
1374 | llvm::SmallVector<mlir::Type> reductionTypes; |
1375 | llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSyms; |
1376 | genParallelClauses(converter, semaCtx, stmtCtx, clauses, loc, |
1377 | /*processReduction=*/!outerCombined, clauseOps, |
1378 | reductionTypes, reductionSyms); |
1379 | |
1380 | auto reductionCallback = [&](mlir::Operation *op) { |
1381 | genReductionVars(op, converter, loc, reductionSyms, reductionTypes); |
1382 | return reductionSyms; |
1383 | }; |
1384 | |
1385 | OpWithBodyGenInfo genInfo = |
1386 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1387 | llvm::omp::Directive::OMPD_parallel) |
1388 | .setGenNested(genNested) |
1389 | .setOuterCombined(outerCombined) |
1390 | .setClauses(&clauses) |
1391 | .setReductions(&reductionSyms, &reductionTypes) |
1392 | .setGenRegionEntryCb(reductionCallback); |
1393 | |
1394 | if (!enableDelayedPrivatization) |
1395 | return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps); |
1396 | |
1397 | bool privatize = !outerCombined; |
1398 | DataSharingProcessor dsp(converter, semaCtx, clauses, eval, |
1399 | /*useDelayedPrivatization=*/true, &symTable); |
1400 | |
1401 | if (privatize) |
1402 | dsp.processStep1(clauseOps: &clauseOps, privateSyms: &privateSyms); |
1403 | |
1404 | auto genRegionEntryCB = [&](mlir::Operation *op) { |
1405 | auto parallelOp = llvm::cast<mlir::omp::ParallelOp>(op); |
1406 | |
1407 | llvm::SmallVector<mlir::Location> reductionLocs( |
1408 | clauseOps.reductionVars.size(), loc); |
1409 | |
1410 | mlir::OperandRange privateVars = parallelOp.getPrivateVars(); |
1411 | mlir::Region ®ion = parallelOp.getRegion(); |
1412 | |
1413 | llvm::SmallVector<mlir::Type> privateVarTypes = reductionTypes; |
1414 | privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); |
1415 | llvm::transform(privateVars, std::back_inserter(privateVarTypes), |
1416 | [](mlir::Value v) { return v.getType(); }); |
1417 | |
1418 | llvm::SmallVector<mlir::Location> privateVarLocs = reductionLocs; |
1419 | privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); |
1420 | llvm::transform(privateVars, std::back_inserter(privateVarLocs), |
1421 | [](mlir::Value v) { return v.getLoc(); }); |
1422 | |
1423 | firOpBuilder.createBlock(®ion, /*insertPt=*/{}, privateVarTypes, |
1424 | privateVarLocs); |
1425 | |
1426 | llvm::SmallVector<const Fortran::semantics::Symbol *> allSymbols = |
1427 | reductionSyms; |
1428 | allSymbols.append(RHS: privateSyms); |
1429 | for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { |
1430 | converter.bindSymbol(*arg, prv); |
1431 | } |
1432 | |
1433 | return allSymbols; |
1434 | }; |
1435 | |
1436 | // TODO Merge with the reduction CB. |
1437 | genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); |
1438 | return genOpWithBody<mlir::omp::ParallelOp>(genInfo, clauseOps); |
1439 | } |
1440 | |
1441 | static mlir::omp::SectionOp |
1442 | genSectionOp(Fortran::lower::AbstractConverter &converter, |
1443 | Fortran::semantics::SemanticsContext &semaCtx, |
1444 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1445 | mlir::Location loc, const List<Clause> &clauses) { |
1446 | // Currently only private/firstprivate clause is handled, and |
1447 | // all privatization is done within `omp.section` operations. |
1448 | return genOpWithBody<mlir::omp::SectionOp>( |
1449 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1450 | llvm::omp::Directive::OMPD_section) |
1451 | .setGenNested(genNested) |
1452 | .setClauses(&clauses)); |
1453 | } |
1454 | |
1455 | static mlir::omp::SectionsOp |
1456 | genSectionsOp(Fortran::lower::AbstractConverter &converter, |
1457 | Fortran::semantics::SemanticsContext &semaCtx, |
1458 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1459 | const mlir::omp::SectionsClauseOps &clauseOps) { |
1460 | return genOpWithBody<mlir::omp::SectionsOp>( |
1461 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1462 | llvm::omp::Directive::OMPD_sections) |
1463 | .setGenNested(false), |
1464 | clauseOps); |
1465 | } |
1466 | |
1467 | static mlir::omp::SimdOp |
1468 | genSimdOp(Fortran::lower::AbstractConverter &converter, |
1469 | Fortran::semantics::SemanticsContext &semaCtx, |
1470 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1471 | const List<Clause> &clauses) { |
1472 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1473 | DataSharingProcessor dsp(converter, semaCtx, clauses, eval); |
1474 | dsp.processStep1(); |
1475 | |
1476 | Fortran::lower::StatementContext stmtCtx; |
1477 | mlir::omp::LoopNestClauseOps loopClauseOps; |
1478 | mlir::omp::SimdClauseOps simdClauseOps; |
1479 | llvm::SmallVector<const Fortran::semantics::Symbol *> iv; |
1480 | genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopClauseOps, iv); |
1481 | genSimdClauses(converter, semaCtx, clauses, loc, simdClauseOps); |
1482 | |
1483 | // Create omp.simd wrapper. |
1484 | auto simdOp = firOpBuilder.create<mlir::omp::SimdOp>(loc, simdClauseOps); |
1485 | |
1486 | // TODO: Add reduction-related arguments to the wrapper's entry block. |
1487 | firOpBuilder.createBlock(&simdOp.getRegion()); |
1488 | firOpBuilder.setInsertionPoint( |
1489 | Fortran::lower::genOpenMPTerminator(firOpBuilder, simdOp, loc)); |
1490 | |
1491 | // Create nested omp.loop_nest and fill body with loop contents. |
1492 | auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps); |
1493 | |
1494 | auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses)); |
1495 | |
1496 | auto ivCallback = [&](mlir::Operation *op) { |
1497 | genLoopVars(op, converter, loc, iv); |
1498 | return iv; |
1499 | }; |
1500 | |
1501 | createBodyOfOp(*loopOp, |
1502 | OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval, |
1503 | llvm::omp::Directive::OMPD_simd) |
1504 | .setClauses(&clauses) |
1505 | .setDataSharingProcessor(&dsp) |
1506 | .setGenRegionEntryCb(ivCallback)); |
1507 | |
1508 | return simdOp; |
1509 | } |
1510 | |
1511 | static mlir::omp::SingleOp |
1512 | genSingleOp(Fortran::lower::AbstractConverter &converter, |
1513 | Fortran::semantics::SemanticsContext &semaCtx, |
1514 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1515 | mlir::Location loc, const List<Clause> &clauses) { |
1516 | mlir::omp::SingleClauseOps clauseOps; |
1517 | genSingleClauses(converter, semaCtx, clauses, loc, clauseOps); |
1518 | |
1519 | return genOpWithBody<mlir::omp::SingleOp>( |
1520 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1521 | llvm::omp::Directive::OMPD_single) |
1522 | .setGenNested(genNested) |
1523 | .setClauses(&clauses), |
1524 | clauseOps); |
1525 | } |
1526 | |
1527 | static mlir::omp::TargetOp |
1528 | genTargetOp(Fortran::lower::AbstractConverter &converter, |
1529 | Fortran::semantics::SemanticsContext &semaCtx, |
1530 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1531 | mlir::Location loc, const List<Clause> &clauses, |
1532 | bool outerCombined = false) { |
1533 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1534 | Fortran::lower::StatementContext stmtCtx; |
1535 | |
1536 | bool processHostOnlyClauses = |
1537 | !llvm::cast<mlir::omp::OffloadModuleInterface>(*converter.getModuleOp()) |
1538 | .getIsTargetDevice(); |
1539 | |
1540 | mlir::omp::TargetClauseOps clauseOps; |
1541 | llvm::SmallVector<const Fortran::semantics::Symbol *> mapSyms, devicePtrSyms, |
1542 | deviceAddrSyms; |
1543 | llvm::SmallVector<mlir::Location> mapLocs, devicePtrLocs, deviceAddrLocs; |
1544 | llvm::SmallVector<mlir::Type> mapTypes, devicePtrTypes, deviceAddrTypes; |
1545 | genTargetClauses(converter, semaCtx, stmtCtx, clauses, loc, |
1546 | processHostOnlyClauses, /*processReduction=*/outerCombined, |
1547 | clauseOps, mapSyms, mapLocs, mapTypes, deviceAddrSyms, |
1548 | deviceAddrLocs, deviceAddrTypes, devicePtrSyms, |
1549 | devicePtrLocs, devicePtrTypes); |
1550 | |
1551 | // 5.8.1 Implicit Data-Mapping Attribute Rules |
1552 | // The following code follows the implicit data-mapping rules to map all the |
1553 | // symbols used inside the region that have not been explicitly mapped using |
1554 | // the map clause. |
1555 | auto captureImplicitMap = [&](const Fortran::semantics::Symbol &sym) { |
1556 | if (llvm::find(Range&: mapSyms, Val: &sym) == mapSyms.end()) { |
1557 | mlir::Value baseOp = converter.getSymbolAddress(sym); |
1558 | if (!baseOp) |
1559 | if (const auto *details = sym.template detailsIf< |
1560 | Fortran::semantics::HostAssocDetails>()) { |
1561 | baseOp = converter.getSymbolAddress(details->symbol()); |
1562 | converter.copySymbolBinding(details->symbol(), sym); |
1563 | } |
1564 | |
1565 | if (baseOp) { |
1566 | llvm::SmallVector<mlir::Value> bounds; |
1567 | std::stringstream name; |
1568 | fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); |
1569 | name << sym.name().ToString(); |
1570 | |
1571 | Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( |
1572 | converter, firOpBuilder, sym, converter.getCurrentLocation()); |
1573 | if (fir::unwrapRefType(info.addr.getType()).isa<fir::BaseBoxType>()) |
1574 | bounds = |
1575 | Fortran::lower::genBoundsOpsFromBox<mlir::omp::MapBoundsOp, |
1576 | mlir::omp::MapBoundsType>( |
1577 | firOpBuilder, converter.getCurrentLocation(), converter, |
1578 | dataExv, info); |
1579 | if (fir::unwrapRefType(info.addr.getType()).isa<fir::SequenceType>()) { |
1580 | bool dataExvIsAssumedSize = |
1581 | Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); |
1582 | bounds = Fortran::lower::genBaseBoundsOps<mlir::omp::MapBoundsOp, |
1583 | mlir::omp::MapBoundsType>( |
1584 | firOpBuilder, converter.getCurrentLocation(), converter, dataExv, |
1585 | dataExvIsAssumedSize); |
1586 | } |
1587 | |
1588 | llvm::omp::OpenMPOffloadMappingFlags mapFlag = |
1589 | llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT; |
1590 | mlir::omp::VariableCaptureKind captureKind = |
1591 | mlir::omp::VariableCaptureKind::ByRef; |
1592 | |
1593 | mlir::Type eleType = baseOp.getType(); |
1594 | if (auto refType = baseOp.getType().dyn_cast<fir::ReferenceType>()) |
1595 | eleType = refType.getElementType(); |
1596 | |
1597 | // If a variable is specified in declare target link and if device |
1598 | // type is not specified as `nohost`, it needs to be mapped tofrom |
1599 | mlir::ModuleOp mod = firOpBuilder.getModule(); |
1600 | mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); |
1601 | auto declareTargetOp = |
1602 | llvm::dyn_cast_if_present<mlir::omp::DeclareTargetInterface>(op); |
1603 | if (declareTargetOp && declareTargetOp.isDeclareTarget()) { |
1604 | if (declareTargetOp.getDeclareTargetCaptureClause() == |
1605 | mlir::omp::DeclareTargetCaptureClause::link && |
1606 | declareTargetOp.getDeclareTargetDeviceType() != |
1607 | mlir::omp::DeclareTargetDeviceType::nohost) { |
1608 | mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; |
1609 | mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; |
1610 | } |
1611 | } else if (fir::isa_trivial(eleType) || fir::isa_char(eleType)) { |
1612 | captureKind = mlir::omp::VariableCaptureKind::ByCopy; |
1613 | } else if (!fir::isa_builtin_cptr_type(eleType)) { |
1614 | mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO; |
1615 | mapFlag |= llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM; |
1616 | } |
1617 | |
1618 | mlir::Value mapOp = createMapInfoOp( |
1619 | firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(), |
1620 | bounds, {}, |
1621 | static_cast< |
1622 | std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>( |
1623 | mapFlag), |
1624 | captureKind, baseOp.getType()); |
1625 | |
1626 | clauseOps.mapVars.push_back(mapOp); |
1627 | mapSyms.push_back(Elt: &sym); |
1628 | mapLocs.push_back(baseOp.getLoc()); |
1629 | mapTypes.push_back(baseOp.getType()); |
1630 | } |
1631 | } |
1632 | }; |
1633 | Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); |
1634 | |
1635 | auto targetOp = firOpBuilder.create<mlir::omp::TargetOp>(loc, clauseOps); |
1636 | genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms, |
1637 | mapLocs, mapTypes, loc); |
1638 | return targetOp; |
1639 | } |
1640 | |
1641 | static mlir::omp::TargetDataOp |
1642 | genTargetDataOp(Fortran::lower::AbstractConverter &converter, |
1643 | Fortran::semantics::SemanticsContext &semaCtx, |
1644 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1645 | mlir::Location loc, const List<Clause> &clauses) { |
1646 | Fortran::lower::StatementContext stmtCtx; |
1647 | mlir::omp::TargetDataClauseOps clauseOps; |
1648 | llvm::SmallVector<mlir::Type> useDeviceTypes; |
1649 | llvm::SmallVector<mlir::Location> useDeviceLocs; |
1650 | llvm::SmallVector<const Fortran::semantics::Symbol *> useDeviceSyms; |
1651 | genTargetDataClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps, |
1652 | useDeviceTypes, useDeviceLocs, useDeviceSyms); |
1653 | |
1654 | auto targetDataOp = |
1655 | converter.getFirOpBuilder().create<mlir::omp::TargetDataOp>(loc, |
1656 | clauseOps); |
1657 | genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, targetDataOp, |
1658 | useDeviceTypes, useDeviceLocs, useDeviceSyms, loc); |
1659 | return targetDataOp; |
1660 | } |
1661 | |
1662 | template <typename OpTy> |
1663 | static OpTy |
1664 | genTargetEnterExitUpdateDataOp(Fortran::lower::AbstractConverter &converter, |
1665 | Fortran::semantics::SemanticsContext &semaCtx, |
1666 | mlir::Location loc, |
1667 | const List<Clause> &clauses) { |
1668 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1669 | Fortran::lower::StatementContext stmtCtx; |
1670 | |
1671 | // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. |
1672 | [[maybe_unused]] llvm::omp::Directive directive; |
1673 | if constexpr (std::is_same_v<OpTy, mlir::omp::TargetEnterDataOp>) { |
1674 | directive = llvm::omp::Directive::OMPD_target_enter_data; |
1675 | } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetExitDataOp>) { |
1676 | directive = llvm::omp::Directive::OMPD_target_exit_data; |
1677 | } else if constexpr (std::is_same_v<OpTy, mlir::omp::TargetUpdateOp>) { |
1678 | directive = llvm::omp::Directive::OMPD_target_update; |
1679 | } else { |
1680 | llvm_unreachable("Unexpected TARGET DATA construct" ); |
1681 | } |
1682 | |
1683 | mlir::omp::TargetEnterExitUpdateDataClauseOps clauseOps; |
1684 | genTargetEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauses, loc, |
1685 | directive, clauseOps); |
1686 | |
1687 | return firOpBuilder.create<OpTy>(loc, clauseOps); |
1688 | } |
1689 | |
1690 | static mlir::omp::TaskOp |
1691 | genTaskOp(Fortran::lower::AbstractConverter &converter, |
1692 | Fortran::semantics::SemanticsContext &semaCtx, |
1693 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1694 | mlir::Location loc, const List<Clause> &clauses) { |
1695 | Fortran::lower::StatementContext stmtCtx; |
1696 | mlir::omp::TaskClauseOps clauseOps; |
1697 | genTaskClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps); |
1698 | |
1699 | return genOpWithBody<mlir::omp::TaskOp>( |
1700 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1701 | llvm::omp::Directive::OMPD_task) |
1702 | .setGenNested(genNested) |
1703 | .setClauses(&clauses), |
1704 | clauseOps); |
1705 | } |
1706 | |
1707 | static mlir::omp::TaskgroupOp |
1708 | genTaskgroupOp(Fortran::lower::AbstractConverter &converter, |
1709 | Fortran::semantics::SemanticsContext &semaCtx, |
1710 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1711 | mlir::Location loc, const List<Clause> &clauses) { |
1712 | mlir::omp::TaskgroupClauseOps clauseOps; |
1713 | genTaskgroupClauses(converter, semaCtx, clauses, loc, clauseOps); |
1714 | |
1715 | return genOpWithBody<mlir::omp::TaskgroupOp>( |
1716 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1717 | llvm::omp::Directive::OMPD_taskgroup) |
1718 | .setGenNested(genNested) |
1719 | .setClauses(&clauses), |
1720 | clauseOps); |
1721 | } |
1722 | |
1723 | static mlir::omp::TaskloopOp |
1724 | genTaskloopOp(Fortran::lower::AbstractConverter &converter, |
1725 | Fortran::semantics::SemanticsContext &semaCtx, |
1726 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1727 | const List<Clause> &clauses) { |
1728 | TODO(loc, "Taskloop construct" ); |
1729 | } |
1730 | |
1731 | static mlir::omp::TaskwaitOp |
1732 | genTaskwaitOp(Fortran::lower::AbstractConverter &converter, |
1733 | Fortran::semantics::SemanticsContext &semaCtx, |
1734 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1735 | const List<Clause> &clauses) { |
1736 | mlir::omp::TaskwaitClauseOps clauseOps; |
1737 | genTaskwaitClauses(converter, semaCtx, clauses, loc, clauseOps); |
1738 | return converter.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(loc, |
1739 | clauseOps); |
1740 | } |
1741 | |
1742 | static mlir::omp::TaskyieldOp |
1743 | genTaskyieldOp(Fortran::lower::AbstractConverter &converter, |
1744 | Fortran::semantics::SemanticsContext &semaCtx, |
1745 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc) { |
1746 | return converter.getFirOpBuilder().create<mlir::omp::TaskyieldOp>(loc); |
1747 | } |
1748 | |
1749 | static mlir::omp::TeamsOp |
1750 | genTeamsOp(Fortran::lower::AbstractConverter &converter, |
1751 | Fortran::semantics::SemanticsContext &semaCtx, |
1752 | Fortran::lower::pft::Evaluation &eval, bool genNested, |
1753 | mlir::Location loc, const List<Clause> &clauses, |
1754 | bool outerCombined = false) { |
1755 | Fortran::lower::StatementContext stmtCtx; |
1756 | mlir::omp::TeamsClauseOps clauseOps; |
1757 | genTeamsClauses(converter, semaCtx, stmtCtx, clauses, loc, clauseOps); |
1758 | |
1759 | return genOpWithBody<mlir::omp::TeamsOp>( |
1760 | OpWithBodyGenInfo(converter, semaCtx, loc, eval, |
1761 | llvm::omp::Directive::OMPD_teams) |
1762 | .setGenNested(genNested) |
1763 | .setOuterCombined(outerCombined) |
1764 | .setClauses(&clauses), |
1765 | clauseOps); |
1766 | } |
1767 | |
1768 | static mlir::omp::WsloopOp |
1769 | genWsloopOp(Fortran::lower::AbstractConverter &converter, |
1770 | Fortran::semantics::SemanticsContext &semaCtx, |
1771 | Fortran::lower::pft::Evaluation &eval, mlir::Location loc, |
1772 | const List<Clause> &clauses) { |
1773 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
1774 | DataSharingProcessor dsp(converter, semaCtx, clauses, eval); |
1775 | dsp.processStep1(); |
1776 | |
1777 | Fortran::lower::StatementContext stmtCtx; |
1778 | mlir::omp::LoopNestClauseOps loopClauseOps; |
1779 | mlir::omp::WsloopClauseOps wsClauseOps; |
1780 | llvm::SmallVector<const Fortran::semantics::Symbol *> iv; |
1781 | llvm::SmallVector<mlir::Type> reductionTypes; |
1782 | llvm::SmallVector<const Fortran::semantics::Symbol *> reductionSyms; |
1783 | genLoopNestClauses(converter, semaCtx, eval, clauses, loc, loopClauseOps, iv); |
1784 | genWsloopClauses(converter, semaCtx, stmtCtx, clauses, loc, wsClauseOps, |
1785 | reductionTypes, reductionSyms); |
1786 | |
1787 | // Create omp.wsloop wrapper and populate entry block arguments with reduction |
1788 | // variables. |
1789 | auto wsloopOp = firOpBuilder.create<mlir::omp::WsloopOp>(loc, wsClauseOps); |
1790 | llvm::SmallVector<mlir::Location> reductionLocs(reductionSyms.size(), loc); |
1791 | mlir::Block *wsloopEntryBlock = firOpBuilder.createBlock( |
1792 | &wsloopOp.getRegion(), {}, reductionTypes, reductionLocs); |
1793 | firOpBuilder.setInsertionPoint( |
1794 | Fortran::lower::genOpenMPTerminator(firOpBuilder, wsloopOp, loc)); |
1795 | |
1796 | // Create nested omp.loop_nest and fill body with loop contents. |
1797 | auto loopOp = firOpBuilder.create<mlir::omp::LoopNestOp>(loc, loopClauseOps); |
1798 | |
1799 | auto *nestedEval = getCollapsedLoopEval(eval, getCollapseValue(clauses)); |
1800 | |
1801 | auto ivCallback = [&](mlir::Operation *op) { |
1802 | genLoopVars(op, converter, loc, iv, reductionSyms, |
1803 | wsloopEntryBlock->getArguments()); |
1804 | return iv; |
1805 | }; |
1806 | |
1807 | createBodyOfOp(*loopOp, |
1808 | OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval, |
1809 | llvm::omp::Directive::OMPD_do) |
1810 | .setClauses(&clauses) |
1811 | .setDataSharingProcessor(&dsp) |
1812 | .setReductions(&reductionSyms, &reductionTypes) |
1813 | .setGenRegionEntryCb(ivCallback)); |
1814 | return wsloopOp; |
1815 | } |
1816 | |
1817 | //===----------------------------------------------------------------------===// |
1818 | // Code generation functions for composite constructs |
1819 | //===----------------------------------------------------------------------===// |
1820 | |
1821 | static void |
1822 | genCompositeDistributeParallelDo(Fortran::lower::AbstractConverter &converter, |
1823 | Fortran::semantics::SemanticsContext &semaCtx, |
1824 | Fortran::lower::pft::Evaluation &eval, |
1825 | const List<Clause> &clauses, |
1826 | mlir::Location loc) { |
1827 | TODO(loc, "Composite DISTRIBUTE PARALLEL DO" ); |
1828 | } |
1829 | |
1830 | static void genCompositeDistributeParallelDoSimd( |
1831 | Fortran::lower::AbstractConverter &converter, |
1832 | Fortran::semantics::SemanticsContext &semaCtx, |
1833 | Fortran::lower::pft::Evaluation &eval, const List<Clause> &clauses, |
1834 | mlir::Location loc) { |
1835 | TODO(loc, "Composite DISTRIBUTE PARALLEL DO SIMD" ); |
1836 | } |
1837 | |
1838 | static void |
1839 | genCompositeDistributeSimd(Fortran::lower::AbstractConverter &converter, |
1840 | Fortran::semantics::SemanticsContext &semaCtx, |
1841 | Fortran::lower::pft::Evaluation &eval, |
1842 | const List<Clause> &clauses, mlir::Location loc) { |
1843 | TODO(loc, "Composite DISTRIBUTE SIMD" ); |
1844 | } |
1845 | |
1846 | static void genCompositeDoSimd(Fortran::lower::AbstractConverter &converter, |
1847 | Fortran::semantics::SemanticsContext &semaCtx, |
1848 | Fortran::lower::pft::Evaluation &eval, |
1849 | const List<Clause> &clauses, |
1850 | mlir::Location loc) { |
1851 | ClauseProcessor cp(converter, semaCtx, clauses); |
1852 | cp.processTODO<clause::Aligned, clause::Allocate, clause::Linear, |
1853 | clause::Order, clause::Safelen, clause::Simdlen>( |
1854 | loc, llvm::omp::OMPD_do_simd); |
1855 | // TODO: Add support for vectorization - add vectorization hints inside loop |
1856 | // body. |
1857 | // OpenMP standard does not specify the length of vector instructions. |
1858 | // Currently we safely assume that for !$omp do simd pragma the SIMD length |
1859 | // is equal to 1 (i.e. we generate standard workshare loop). |
1860 | // When support for vectorization is enabled, then we need to add handling of |
1861 | // if clause. Currently if clause can be skipped because we always assume |
1862 | // SIMD length = 1. |
1863 | genWsloopOp(converter, semaCtx, eval, loc, clauses); |
1864 | } |
1865 | |
1866 | static void |
1867 | genCompositeTaskloopSimd(Fortran::lower::AbstractConverter &converter, |
1868 | Fortran::semantics::SemanticsContext &semaCtx, |
1869 | Fortran::lower::pft::Evaluation &eval, |
1870 | const List<Clause> &clauses, mlir::Location loc) { |
1871 | TODO(loc, "Composite TASKLOOP SIMD" ); |
1872 | } |
1873 | |
1874 | //===----------------------------------------------------------------------===// |
1875 | // OpenMPDeclarativeConstruct visitors |
1876 | //===----------------------------------------------------------------------===// |
1877 | |
1878 | static void |
1879 | genOMP(Fortran::lower::AbstractConverter &converter, |
1880 | Fortran::lower::SymMap &symTable, |
1881 | Fortran::semantics::SemanticsContext &semaCtx, |
1882 | Fortran::lower::pft::Evaluation &eval, |
1883 | const Fortran::parser::OpenMPDeclarativeAllocate &declarativeAllocate) { |
1884 | TODO(converter.getCurrentLocation(), "OpenMPDeclarativeAllocate" ); |
1885 | } |
1886 | |
1887 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
1888 | Fortran::lower::SymMap &symTable, |
1889 | Fortran::semantics::SemanticsContext &semaCtx, |
1890 | Fortran::lower::pft::Evaluation &eval, |
1891 | const Fortran::parser::OpenMPDeclareReductionConstruct |
1892 | &declareReductionConstruct) { |
1893 | TODO(converter.getCurrentLocation(), "OpenMPDeclareReductionConstruct" ); |
1894 | } |
1895 | |
1896 | static void genOMP( |
1897 | Fortran::lower::AbstractConverter &converter, |
1898 | Fortran::lower::SymMap &symTable, |
1899 | Fortran::semantics::SemanticsContext &semaCtx, |
1900 | Fortran::lower::pft::Evaluation &eval, |
1901 | const Fortran::parser::OpenMPDeclareSimdConstruct &declareSimdConstruct) { |
1902 | TODO(converter.getCurrentLocation(), "OpenMPDeclareSimdConstruct" ); |
1903 | } |
1904 | |
1905 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
1906 | Fortran::lower::SymMap &symTable, |
1907 | Fortran::semantics::SemanticsContext &semaCtx, |
1908 | Fortran::lower::pft::Evaluation &eval, |
1909 | const Fortran::parser::OpenMPDeclareTargetConstruct |
1910 | &declareTargetConstruct) { |
1911 | mlir::omp::DeclareTargetClauseOps clauseOps; |
1912 | llvm::SmallVector<DeclareTargetCapturePair> symbolAndClause; |
1913 | mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); |
1914 | getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, |
1915 | clauseOps, symbolAndClause); |
1916 | |
1917 | for (const DeclareTargetCapturePair &symClause : symbolAndClause) { |
1918 | mlir::Operation *op = mod.lookupSymbol(converter.mangleName( |
1919 | std::get<const Fortran::semantics::Symbol &>(symClause))); |
1920 | |
1921 | // Some symbols are deferred until later in the module, these are handled |
1922 | // upon finalization of the module for OpenMP inside of Bridge, so we simply |
1923 | // skip for now. |
1924 | if (!op) |
1925 | continue; |
1926 | |
1927 | markDeclareTarget( |
1928 | op, converter, |
1929 | std::get<mlir::omp::DeclareTargetCaptureClause>(symClause), |
1930 | clauseOps.deviceType); |
1931 | } |
1932 | } |
1933 | |
1934 | static void |
1935 | genOMP(Fortran::lower::AbstractConverter &converter, |
1936 | Fortran::lower::SymMap &symTable, |
1937 | Fortran::semantics::SemanticsContext &semaCtx, |
1938 | Fortran::lower::pft::Evaluation &eval, |
1939 | const Fortran::parser::OpenMPRequiresConstruct &requiresConstruct) { |
1940 | // Requires directives are gathered and processed in semantics and |
1941 | // then combined in the lowering bridge before triggering codegen |
1942 | // just once. Hence, there is no need to lower each individual |
1943 | // occurrence here. |
1944 | } |
1945 | |
1946 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
1947 | Fortran::lower::SymMap &symTable, |
1948 | Fortran::semantics::SemanticsContext &semaCtx, |
1949 | Fortran::lower::pft::Evaluation &eval, |
1950 | const Fortran::parser::OpenMPThreadprivate &threadprivate) { |
1951 | // The directive is lowered when instantiating the variable to |
1952 | // support the case of threadprivate variable declared in module. |
1953 | } |
1954 | |
1955 | static void |
1956 | genOMP(Fortran::lower::AbstractConverter &converter, |
1957 | Fortran::lower::SymMap &symTable, |
1958 | Fortran::semantics::SemanticsContext &semaCtx, |
1959 | Fortran::lower::pft::Evaluation &eval, |
1960 | const Fortran::parser::OpenMPDeclarativeConstruct &ompDeclConstruct) { |
1961 | std::visit( |
1962 | [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, |
1963 | ompDeclConstruct.u); |
1964 | } |
1965 | |
1966 | //===----------------------------------------------------------------------===// |
1967 | // OpenMPStandaloneConstruct visitors |
1968 | //===----------------------------------------------------------------------===// |
1969 | |
1970 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
1971 | Fortran::lower::SymMap &symTable, |
1972 | Fortran::semantics::SemanticsContext &semaCtx, |
1973 | Fortran::lower::pft::Evaluation &eval, |
1974 | const Fortran::parser::OpenMPSimpleStandaloneConstruct |
1975 | &simpleStandaloneConstruct) { |
1976 | const auto &directive = |
1977 | std::get<Fortran::parser::OmpSimpleStandaloneDirective>( |
1978 | simpleStandaloneConstruct.t); |
1979 | List<Clause> clauses = makeClauses( |
1980 | std::get<Fortran::parser::OmpClauseList>(simpleStandaloneConstruct.t), |
1981 | semaCtx); |
1982 | mlir::Location currentLocation = converter.genLocation(directive.source); |
1983 | |
1984 | switch (directive.v) { |
1985 | default: |
1986 | break; |
1987 | case llvm::omp::Directive::OMPD_barrier: |
1988 | genBarrierOp(converter, semaCtx, eval, currentLocation); |
1989 | break; |
1990 | case llvm::omp::Directive::OMPD_taskwait: |
1991 | genTaskwaitOp(converter, semaCtx, eval, currentLocation, clauses); |
1992 | break; |
1993 | case llvm::omp::Directive::OMPD_taskyield: |
1994 | genTaskyieldOp(converter, semaCtx, eval, currentLocation); |
1995 | break; |
1996 | case llvm::omp::Directive::OMPD_target_data: |
1997 | genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true, |
1998 | currentLocation, clauses); |
1999 | break; |
2000 | case llvm::omp::Directive::OMPD_target_enter_data: |
2001 | genTargetEnterExitUpdateDataOp<mlir::omp::TargetEnterDataOp>( |
2002 | converter, semaCtx, currentLocation, clauses); |
2003 | break; |
2004 | case llvm::omp::Directive::OMPD_target_exit_data: |
2005 | genTargetEnterExitUpdateDataOp<mlir::omp::TargetExitDataOp>( |
2006 | converter, semaCtx, currentLocation, clauses); |
2007 | break; |
2008 | case llvm::omp::Directive::OMPD_target_update: |
2009 | genTargetEnterExitUpdateDataOp<mlir::omp::TargetUpdateOp>( |
2010 | converter, semaCtx, currentLocation, clauses); |
2011 | break; |
2012 | case llvm::omp::Directive::OMPD_ordered: |
2013 | genOrderedOp(converter, semaCtx, eval, currentLocation, clauses); |
2014 | break; |
2015 | } |
2016 | } |
2017 | |
2018 | static void |
2019 | genOMP(Fortran::lower::AbstractConverter &converter, |
2020 | Fortran::lower::SymMap &symTable, |
2021 | Fortran::semantics::SemanticsContext &semaCtx, |
2022 | Fortran::lower::pft::Evaluation &eval, |
2023 | const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { |
2024 | const auto &verbatim = std::get<Fortran::parser::Verbatim>(flushConstruct.t); |
2025 | const auto &objectList = |
2026 | std::get<std::optional<Fortran::parser::OmpObjectList>>(flushConstruct.t); |
2027 | const auto &clauseList = |
2028 | std::get<std::optional<std::list<Fortran::parser::OmpMemoryOrderClause>>>( |
2029 | flushConstruct.t); |
2030 | ObjectList objects = |
2031 | objectList ? makeObjects(*objectList, semaCtx) : ObjectList{}; |
2032 | List<Clause> clauses = |
2033 | clauseList ? makeList(*clauseList, |
2034 | [&](auto &&s) { return makeClause(s.v, semaCtx); }) |
2035 | : List<Clause>{}; |
2036 | mlir::Location currentLocation = converter.genLocation(verbatim.source); |
2037 | genFlushOp(converter, semaCtx, eval, currentLocation, objects, clauses); |
2038 | } |
2039 | |
2040 | static void |
2041 | genOMP(Fortran::lower::AbstractConverter &converter, |
2042 | Fortran::lower::SymMap &symTable, |
2043 | Fortran::semantics::SemanticsContext &semaCtx, |
2044 | Fortran::lower::pft::Evaluation &eval, |
2045 | const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { |
2046 | TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct" ); |
2047 | } |
2048 | |
2049 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
2050 | Fortran::lower::SymMap &symTable, |
2051 | Fortran::semantics::SemanticsContext &semaCtx, |
2052 | Fortran::lower::pft::Evaluation &eval, |
2053 | const Fortran::parser::OpenMPCancellationPointConstruct |
2054 | &cancellationPointConstruct) { |
2055 | TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct" ); |
2056 | } |
2057 | |
2058 | static void |
2059 | genOMP(Fortran::lower::AbstractConverter &converter, |
2060 | Fortran::lower::SymMap &symTable, |
2061 | Fortran::semantics::SemanticsContext &semaCtx, |
2062 | Fortran::lower::pft::Evaluation &eval, |
2063 | const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { |
2064 | std::visit( |
2065 | [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, |
2066 | standaloneConstruct.u); |
2067 | } |
2068 | |
2069 | //===----------------------------------------------------------------------===// |
2070 | // OpenMPConstruct visitors |
2071 | //===----------------------------------------------------------------------===// |
2072 | |
2073 | static void |
2074 | genOMP(Fortran::lower::AbstractConverter &converter, |
2075 | Fortran::lower::SymMap &symTable, |
2076 | Fortran::semantics::SemanticsContext &semaCtx, |
2077 | Fortran::lower::pft::Evaluation &eval, |
2078 | const Fortran::parser::OpenMPAllocatorsConstruct &allocsConstruct) { |
2079 | TODO(converter.getCurrentLocation(), "OpenMPAllocatorsConstruct" ); |
2080 | } |
2081 | |
2082 | static void |
2083 | genOMP(Fortran::lower::AbstractConverter &converter, |
2084 | Fortran::lower::SymMap &symTable, |
2085 | Fortran::semantics::SemanticsContext &semaCtx, |
2086 | Fortran::lower::pft::Evaluation &eval, |
2087 | const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) { |
2088 | std::visit( |
2089 | Fortran::common::visitors{ |
2090 | [&](const Fortran::parser::OmpAtomicRead &atomicRead) { |
2091 | mlir::Location loc = converter.genLocation(atomicRead.source); |
2092 | Fortran::lower::genOmpAccAtomicRead< |
2093 | Fortran::parser::OmpAtomicRead, |
2094 | Fortran::parser::OmpAtomicClauseList>(converter, atomicRead, |
2095 | loc); |
2096 | }, |
2097 | [&](const Fortran::parser::OmpAtomicWrite &atomicWrite) { |
2098 | mlir::Location loc = converter.genLocation(atomicWrite.source); |
2099 | Fortran::lower::genOmpAccAtomicWrite< |
2100 | Fortran::parser::OmpAtomicWrite, |
2101 | Fortran::parser::OmpAtomicClauseList>(converter, atomicWrite, |
2102 | loc); |
2103 | }, |
2104 | [&](const Fortran::parser::OmpAtomic &atomicConstruct) { |
2105 | mlir::Location loc = converter.genLocation(atomicConstruct.source); |
2106 | Fortran::lower::genOmpAtomic<Fortran::parser::OmpAtomic, |
2107 | Fortran::parser::OmpAtomicClauseList>( |
2108 | converter, atomicConstruct, loc); |
2109 | }, |
2110 | [&](const Fortran::parser::OmpAtomicUpdate &atomicUpdate) { |
2111 | mlir::Location loc = converter.genLocation(atomicUpdate.source); |
2112 | Fortran::lower::genOmpAccAtomicUpdate< |
2113 | Fortran::parser::OmpAtomicUpdate, |
2114 | Fortran::parser::OmpAtomicClauseList>(converter, atomicUpdate, |
2115 | loc); |
2116 | }, |
2117 | [&](const Fortran::parser::OmpAtomicCapture &atomicCapture) { |
2118 | mlir::Location loc = converter.genLocation(atomicCapture.source); |
2119 | Fortran::lower::genOmpAccAtomicCapture< |
2120 | Fortran::parser::OmpAtomicCapture, |
2121 | Fortran::parser::OmpAtomicClauseList>(converter, atomicCapture, |
2122 | loc); |
2123 | }, |
2124 | }, |
2125 | atomicConstruct.u); |
2126 | } |
2127 | |
2128 | static void |
2129 | genOMP(Fortran::lower::AbstractConverter &converter, |
2130 | Fortran::lower::SymMap &symTable, |
2131 | Fortran::semantics::SemanticsContext &semaCtx, |
2132 | Fortran::lower::pft::Evaluation &eval, |
2133 | const Fortran::parser::OpenMPBlockConstruct &blockConstruct) { |
2134 | const auto &beginBlockDirective = |
2135 | std::get<Fortran::parser::OmpBeginBlockDirective>(blockConstruct.t); |
2136 | const auto &endBlockDirective = |
2137 | std::get<Fortran::parser::OmpEndBlockDirective>(blockConstruct.t); |
2138 | mlir::Location currentLocation = |
2139 | converter.genLocation(beginBlockDirective.source); |
2140 | const auto origDirective = |
2141 | std::get<Fortran::parser::OmpBlockDirective>(beginBlockDirective.t).v; |
2142 | List<Clause> clauses = makeClauses( |
2143 | std::get<Fortran::parser::OmpClauseList>(beginBlockDirective.t), semaCtx); |
2144 | clauses.append(makeClauses( |
2145 | std::get<Fortran::parser::OmpClauseList>(endBlockDirective.t), semaCtx)); |
2146 | |
2147 | assert(llvm::omp::blockConstructSet.test(origDirective) && |
2148 | "Expected block construct" ); |
2149 | |
2150 | for (const Clause &clause : clauses) { |
2151 | mlir::Location clauseLocation = converter.genLocation(clause.source); |
2152 | if (!std::holds_alternative<clause::Allocate>(clause.u) && |
2153 | !std::holds_alternative<clause::Copyin>(clause.u) && |
2154 | !std::holds_alternative<clause::Copyprivate>(clause.u) && |
2155 | !std::holds_alternative<clause::Default>(clause.u) && |
2156 | !std::holds_alternative<clause::Depend>(clause.u) && |
2157 | !std::holds_alternative<clause::Final>(clause.u) && |
2158 | !std::holds_alternative<clause::Firstprivate>(clause.u) && |
2159 | !std::holds_alternative<clause::HasDeviceAddr>(clause.u) && |
2160 | !std::holds_alternative<clause::If>(clause.u) && |
2161 | !std::holds_alternative<clause::IsDevicePtr>(clause.u) && |
2162 | !std::holds_alternative<clause::Map>(clause.u) && |
2163 | !std::holds_alternative<clause::Nowait>(clause.u) && |
2164 | !std::holds_alternative<clause::NumTeams>(clause.u) && |
2165 | !std::holds_alternative<clause::NumThreads>(clause.u) && |
2166 | !std::holds_alternative<clause::Priority>(clause.u) && |
2167 | !std::holds_alternative<clause::Private>(clause.u) && |
2168 | !std::holds_alternative<clause::ProcBind>(clause.u) && |
2169 | !std::holds_alternative<clause::Reduction>(clause.u) && |
2170 | !std::holds_alternative<clause::Shared>(clause.u) && |
2171 | !std::holds_alternative<clause::Simd>(clause.u) && |
2172 | !std::holds_alternative<clause::ThreadLimit>(clause.u) && |
2173 | !std::holds_alternative<clause::Threads>(clause.u) && |
2174 | !std::holds_alternative<clause::UseDeviceAddr>(clause.u) && |
2175 | !std::holds_alternative<clause::UseDevicePtr>(clause.u)) { |
2176 | TODO(clauseLocation, "OpenMP Block construct clause" ); |
2177 | } |
2178 | } |
2179 | |
2180 | std::optional<llvm::omp::Directive> nextDir = origDirective; |
2181 | bool outermostLeafConstruct = true; |
2182 | while (nextDir) { |
2183 | llvm::omp::Directive leafDir; |
2184 | std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir); |
2185 | const bool genNested = !nextDir; |
2186 | const bool outerCombined = outermostLeafConstruct && nextDir.has_value(); |
2187 | switch (leafDir) { |
2188 | case llvm::omp::Directive::OMPD_master: |
2189 | // 2.16 MASTER construct. |
2190 | genMasterOp(converter, semaCtx, eval, genNested, currentLocation); |
2191 | break; |
2192 | case llvm::omp::Directive::OMPD_ordered: |
2193 | // 2.17.9 ORDERED construct. |
2194 | genOrderedRegionOp(converter, semaCtx, eval, genNested, currentLocation, |
2195 | clauses); |
2196 | break; |
2197 | case llvm::omp::Directive::OMPD_parallel: |
2198 | // 2.6 PARALLEL construct. |
2199 | genParallelOp(converter, symTable, semaCtx, eval, genNested, |
2200 | currentLocation, clauses, outerCombined); |
2201 | break; |
2202 | case llvm::omp::Directive::OMPD_single: |
2203 | // 2.8.2 SINGLE construct. |
2204 | genSingleOp(converter, semaCtx, eval, genNested, currentLocation, |
2205 | clauses); |
2206 | break; |
2207 | case llvm::omp::Directive::OMPD_target: |
2208 | // 2.12.5 TARGET construct. |
2209 | genTargetOp(converter, semaCtx, eval, genNested, currentLocation, clauses, |
2210 | outerCombined); |
2211 | break; |
2212 | case llvm::omp::Directive::OMPD_target_data: |
2213 | // 2.12.2 TARGET DATA construct. |
2214 | genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation, |
2215 | clauses); |
2216 | break; |
2217 | case llvm::omp::Directive::OMPD_task: |
2218 | // 2.10.1 TASK construct. |
2219 | genTaskOp(converter, semaCtx, eval, genNested, currentLocation, clauses); |
2220 | break; |
2221 | case llvm::omp::Directive::OMPD_taskgroup: |
2222 | // 2.17.6 TASKGROUP construct. |
2223 | genTaskgroupOp(converter, semaCtx, eval, genNested, currentLocation, |
2224 | clauses); |
2225 | break; |
2226 | case llvm::omp::Directive::OMPD_teams: |
2227 | // 2.7 TEAMS construct. |
2228 | // FIXME Pass the outerCombined argument or rename it to better describe |
2229 | // what it represents if it must always be `false` in this context. |
2230 | genTeamsOp(converter, semaCtx, eval, genNested, currentLocation, clauses); |
2231 | break; |
2232 | case llvm::omp::Directive::OMPD_workshare: |
2233 | // 2.8.3 WORKSHARE construct. |
2234 | // FIXME: Workshare is not a commonly used OpenMP construct, an |
2235 | // implementation for this feature will come later. For the codes |
2236 | // that use this construct, add a single construct for now. |
2237 | genSingleOp(converter, semaCtx, eval, genNested, currentLocation, |
2238 | clauses); |
2239 | break; |
2240 | default: |
2241 | llvm_unreachable("Unexpected block construct" ); |
2242 | break; |
2243 | } |
2244 | outermostLeafConstruct = false; |
2245 | } |
2246 | } |
2247 | |
2248 | static void |
2249 | genOMP(Fortran::lower::AbstractConverter &converter, |
2250 | Fortran::lower::SymMap &symTable, |
2251 | Fortran::semantics::SemanticsContext &semaCtx, |
2252 | Fortran::lower::pft::Evaluation &eval, |
2253 | const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { |
2254 | const auto &cd = |
2255 | std::get<Fortran::parser::OmpCriticalDirective>(criticalConstruct.t); |
2256 | List<Clause> clauses = |
2257 | makeClauses(std::get<Fortran::parser::OmpClauseList>(cd.t), semaCtx); |
2258 | const auto &name = std::get<std::optional<Fortran::parser::Name>>(cd.t); |
2259 | mlir::Location currentLocation = converter.getCurrentLocation(); |
2260 | genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, |
2261 | clauses, name); |
2262 | } |
2263 | |
2264 | static void |
2265 | genOMP(Fortran::lower::AbstractConverter &converter, |
2266 | Fortran::lower::SymMap &symTable, |
2267 | Fortran::semantics::SemanticsContext &semaCtx, |
2268 | Fortran::lower::pft::Evaluation &eval, |
2269 | const Fortran::parser::OpenMPExecutableAllocate &execAllocConstruct) { |
2270 | TODO(converter.getCurrentLocation(), "OpenMPExecutableAllocate" ); |
2271 | } |
2272 | |
2273 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
2274 | Fortran::lower::SymMap &symTable, |
2275 | Fortran::semantics::SemanticsContext &semaCtx, |
2276 | Fortran::lower::pft::Evaluation &eval, |
2277 | const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { |
2278 | const auto &beginLoopDirective = |
2279 | std::get<Fortran::parser::OmpBeginLoopDirective>(loopConstruct.t); |
2280 | List<Clause> clauses = makeClauses( |
2281 | std::get<Fortran::parser::OmpClauseList>(beginLoopDirective.t), semaCtx); |
2282 | mlir::Location currentLocation = |
2283 | converter.genLocation(beginLoopDirective.source); |
2284 | const auto origDirective = |
2285 | std::get<Fortran::parser::OmpLoopDirective>(beginLoopDirective.t).v; |
2286 | |
2287 | assert(llvm::omp::loopConstructSet.test(origDirective) && |
2288 | "Expected loop construct" ); |
2289 | |
2290 | if (auto &endLoopDirective = |
2291 | std::get<std::optional<Fortran::parser::OmpEndLoopDirective>>( |
2292 | loopConstruct.t)) { |
2293 | clauses.append(makeClauses( |
2294 | std::get<Fortran::parser::OmpClauseList>(endLoopDirective->t), |
2295 | semaCtx)); |
2296 | } |
2297 | |
2298 | std::optional<llvm::omp::Directive> nextDir = origDirective; |
2299 | while (nextDir) { |
2300 | llvm::omp::Directive leafDir; |
2301 | std::tie(leafDir, nextDir) = splitCombinedDirective(*nextDir); |
2302 | if (llvm::omp::compositeConstructSet.test(leafDir)) { |
2303 | assert(!nextDir && "Composite construct cannot be split" ); |
2304 | switch (leafDir) { |
2305 | case llvm::omp::Directive::OMPD_distribute_parallel_do: |
2306 | // 2.9.4.3 DISTRIBUTE PARALLEL Worksharing-Loop construct. |
2307 | genCompositeDistributeParallelDo(converter, semaCtx, eval, clauses, |
2308 | currentLocation); |
2309 | break; |
2310 | case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: |
2311 | // 2.9.4.4 DISTRIBUTE PARALLEL Worksharing-Loop SIMD construct. |
2312 | genCompositeDistributeParallelDoSimd(converter, semaCtx, eval, clauses, |
2313 | currentLocation); |
2314 | break; |
2315 | case llvm::omp::Directive::OMPD_distribute_simd: |
2316 | // 2.9.4.2 DISTRIBUTE SIMD construct. |
2317 | genCompositeDistributeSimd(converter, semaCtx, eval, clauses, |
2318 | currentLocation); |
2319 | break; |
2320 | case llvm::omp::Directive::OMPD_do_simd: |
2321 | // 2.9.3.2 Worksharing-Loop SIMD construct. |
2322 | genCompositeDoSimd(converter, semaCtx, eval, clauses, currentLocation); |
2323 | break; |
2324 | case llvm::omp::Directive::OMPD_taskloop_simd: |
2325 | // 2.10.3 TASKLOOP SIMD construct. |
2326 | genCompositeTaskloopSimd(converter, semaCtx, eval, clauses, |
2327 | currentLocation); |
2328 | break; |
2329 | default: |
2330 | llvm_unreachable("Unexpected composite construct" ); |
2331 | } |
2332 | } else { |
2333 | const bool genNested = !nextDir; |
2334 | switch (leafDir) { |
2335 | case llvm::omp::Directive::OMPD_distribute: |
2336 | // 2.9.4.1 DISTRIBUTE construct. |
2337 | genDistributeOp(converter, semaCtx, eval, genNested, currentLocation, |
2338 | clauses); |
2339 | break; |
2340 | case llvm::omp::Directive::OMPD_do: |
2341 | // 2.9.2 Worksharing-Loop construct. |
2342 | genWsloopOp(converter, semaCtx, eval, currentLocation, clauses); |
2343 | break; |
2344 | case llvm::omp::Directive::OMPD_parallel: |
2345 | // 2.6 PARALLEL construct. |
2346 | // FIXME This is not necessarily always the outer leaf construct of a |
2347 | // combined construct in this constext (e.g. distribute parallel do). |
2348 | // Maybe rename the argument if it represents something else or |
2349 | // initialize it properly. |
2350 | genParallelOp(converter, symTable, semaCtx, eval, genNested, |
2351 | currentLocation, clauses, |
2352 | /*outerCombined=*/true); |
2353 | break; |
2354 | case llvm::omp::Directive::OMPD_simd: |
2355 | // 2.9.3.1 SIMD construct. |
2356 | genSimdOp(converter, semaCtx, eval, currentLocation, clauses); |
2357 | break; |
2358 | case llvm::omp::Directive::OMPD_target: |
2359 | // 2.12.5 TARGET construct. |
2360 | genTargetOp(converter, semaCtx, eval, genNested, currentLocation, |
2361 | clauses, /*outerCombined=*/true); |
2362 | break; |
2363 | case llvm::omp::Directive::OMPD_taskloop: |
2364 | // 2.10.2 TASKLOOP construct. |
2365 | genTaskloopOp(converter, semaCtx, eval, currentLocation, clauses); |
2366 | break; |
2367 | case llvm::omp::Directive::OMPD_teams: |
2368 | // 2.7 TEAMS construct. |
2369 | // FIXME This is not necessarily always the outer leaf construct of a |
2370 | // combined construct in this constext (e.g. target teams distribute). |
2371 | // Maybe rename the argument if it represents something else or |
2372 | // initialize it properly. |
2373 | genTeamsOp(converter, semaCtx, eval, genNested, currentLocation, |
2374 | clauses, /*outerCombined=*/true); |
2375 | break; |
2376 | case llvm::omp::Directive::OMPD_loop: |
2377 | case llvm::omp::Directive::OMPD_masked: |
2378 | case llvm::omp::Directive::OMPD_master: |
2379 | case llvm::omp::Directive::OMPD_tile: |
2380 | case llvm::omp::Directive::OMPD_unroll: |
2381 | TODO(currentLocation, "Unhandled loop directive (" + |
2382 | llvm::omp::getOpenMPDirectiveName(leafDir) + |
2383 | ")" ); |
2384 | break; |
2385 | default: |
2386 | llvm_unreachable("Unexpected loop construct" ); |
2387 | } |
2388 | } |
2389 | } |
2390 | } |
2391 | |
2392 | static void |
2393 | genOMP(Fortran::lower::AbstractConverter &converter, |
2394 | Fortran::lower::SymMap &symTable, |
2395 | Fortran::semantics::SemanticsContext &semaCtx, |
2396 | Fortran::lower::pft::Evaluation &eval, |
2397 | const Fortran::parser::OpenMPSectionConstruct §ionConstruct) { |
2398 | // SECTION constructs are handled as a part of SECTIONS. |
2399 | llvm_unreachable("Unexpected standalone OMP SECTION" ); |
2400 | } |
2401 | |
2402 | static void |
2403 | genOMP(Fortran::lower::AbstractConverter &converter, |
2404 | Fortran::lower::SymMap &symTable, |
2405 | Fortran::semantics::SemanticsContext &semaCtx, |
2406 | Fortran::lower::pft::Evaluation &eval, |
2407 | const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { |
2408 | const auto &beginSectionsDirective = |
2409 | std::get<Fortran::parser::OmpBeginSectionsDirective>(sectionsConstruct.t); |
2410 | List<Clause> clauses = makeClauses( |
2411 | std::get<Fortran::parser::OmpClauseList>(beginSectionsDirective.t), |
2412 | semaCtx); |
2413 | const auto &endSectionsDirective = |
2414 | std::get<Fortran::parser::OmpEndSectionsDirective>(sectionsConstruct.t); |
2415 | clauses.append(makeClauses( |
2416 | std::get<Fortran::parser::OmpClauseList>(endSectionsDirective.t), |
2417 | semaCtx)); |
2418 | |
2419 | // Process clauses before optional omp.parallel, so that new variables are |
2420 | // allocated outside of the parallel region |
2421 | mlir::Location currentLocation = converter.getCurrentLocation(); |
2422 | mlir::omp::SectionsClauseOps clauseOps; |
2423 | genSectionsClauses(converter, semaCtx, clauses, currentLocation, clauseOps); |
2424 | |
2425 | // Parallel wrapper of PARALLEL SECTIONS construct |
2426 | llvm::omp::Directive dir = |
2427 | std::get<Fortran::parser::OmpSectionsDirective>(beginSectionsDirective.t) |
2428 | .v; |
2429 | if (dir == llvm::omp::Directive::OMPD_parallel_sections) { |
2430 | genParallelOp(converter, symTable, semaCtx, eval, |
2431 | /*genNested=*/false, currentLocation, clauses, |
2432 | /*outerCombined=*/true); |
2433 | } |
2434 | |
2435 | // SECTIONS construct. |
2436 | genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps); |
2437 | |
2438 | // Generate nested SECTION operations recursively. |
2439 | const auto §ionBlocks = |
2440 | std::get<Fortran::parser::OmpSectionBlocks>(sectionsConstruct.t); |
2441 | auto &firOpBuilder = converter.getFirOpBuilder(); |
2442 | auto ip = firOpBuilder.saveInsertionPoint(); |
2443 | for (const auto &[nblock, neval] : |
2444 | llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { |
2445 | symTable.pushScope(); |
2446 | genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation, |
2447 | clauses); |
2448 | symTable.popScope(); |
2449 | firOpBuilder.restoreInsertionPoint(ip); |
2450 | } |
2451 | } |
2452 | |
2453 | static void genOMP(Fortran::lower::AbstractConverter &converter, |
2454 | Fortran::lower::SymMap &symTable, |
2455 | Fortran::semantics::SemanticsContext &semaCtx, |
2456 | Fortran::lower::pft::Evaluation &eval, |
2457 | const Fortran::parser::OpenMPConstruct &ompConstruct) { |
2458 | std::visit( |
2459 | [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, |
2460 | ompConstruct.u); |
2461 | } |
2462 | |
2463 | //===----------------------------------------------------------------------===// |
2464 | // Public functions |
2465 | //===----------------------------------------------------------------------===// |
2466 | |
2467 | mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, |
2468 | mlir::Operation *op, |
2469 | mlir::Location loc) { |
2470 | if (mlir::isa<mlir::omp::AtomicUpdateOp, mlir::omp::DeclareReductionOp, |
2471 | mlir::omp::LoopNestOp>(op)) |
2472 | return builder.create<mlir::omp::YieldOp>(loc); |
2473 | return builder.create<mlir::omp::TerminatorOp>(loc); |
2474 | } |
2475 | |
2476 | void Fortran::lower::genOpenMPConstruct( |
2477 | Fortran::lower::AbstractConverter &converter, |
2478 | Fortran::lower::SymMap &symTable, |
2479 | Fortran::semantics::SemanticsContext &semaCtx, |
2480 | Fortran::lower::pft::Evaluation &eval, |
2481 | const Fortran::parser::OpenMPConstruct &omp) { |
2482 | symTable.pushScope(); |
2483 | genOMP(converter, symTable, semaCtx, eval, omp); |
2484 | symTable.popScope(); |
2485 | } |
2486 | |
2487 | void Fortran::lower::genOpenMPDeclarativeConstruct( |
2488 | Fortran::lower::AbstractConverter &converter, |
2489 | Fortran::lower::SymMap &symTable, |
2490 | Fortran::semantics::SemanticsContext &semaCtx, |
2491 | Fortran::lower::pft::Evaluation &eval, |
2492 | const Fortran::parser::OpenMPDeclarativeConstruct &omp) { |
2493 | genOMP(converter, symTable, semaCtx, eval, omp); |
2494 | genNestedEvaluations(converter, eval); |
2495 | } |
2496 | |
2497 | void Fortran::lower::genOpenMPSymbolProperties( |
2498 | Fortran::lower::AbstractConverter &converter, |
2499 | const Fortran::lower::pft::Variable &var) { |
2500 | assert(var.hasSymbol() && "Expecting Symbol" ); |
2501 | const Fortran::semantics::Symbol &sym = var.getSymbol(); |
2502 | |
2503 | if (sym.test(Fortran::semantics::Symbol::Flag::OmpThreadprivate)) |
2504 | Fortran::lower::genThreadprivateOp(converter, var); |
2505 | |
2506 | if (sym.test(Fortran::semantics::Symbol::Flag::OmpDeclareTarget)) |
2507 | Fortran::lower::genDeclareTargetIntGlobal(converter, var); |
2508 | } |
2509 | |
2510 | int64_t Fortran::lower::getCollapseValue( |
2511 | const Fortran::parser::OmpClauseList &clauseList) { |
2512 | for (const Fortran::parser::OmpClause &clause : clauseList.v) { |
2513 | if (const auto &collapseClause = |
2514 | std::get_if<Fortran::parser::OmpClause::Collapse>(&clause.u)) { |
2515 | const auto *expr = Fortran::semantics::GetExpr(collapseClause->v); |
2516 | return Fortran::evaluate::ToInt64(*expr).value(); |
2517 | } |
2518 | } |
2519 | return 1; |
2520 | } |
2521 | |
2522 | void Fortran::lower::genThreadprivateOp( |
2523 | Fortran::lower::AbstractConverter &converter, |
2524 | const Fortran::lower::pft::Variable &var) { |
2525 | fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); |
2526 | mlir::Location currentLocation = converter.getCurrentLocation(); |
2527 | |
2528 | const Fortran::semantics::Symbol &sym = var.getSymbol(); |
2529 | mlir::Value symThreadprivateValue; |
2530 | if (const Fortran::semantics::Symbol *common = |
2531 | Fortran::semantics::FindCommonBlockContaining(sym.GetUltimate())) { |
2532 | mlir::Value commonValue = converter.getSymbolAddress(*common); |
2533 | if (mlir::isa<mlir::omp::ThreadprivateOp>(commonValue.getDefiningOp())) { |
2534 | // Generate ThreadprivateOp for a common block instead of its members and |
2535 | // only do it once for a common block. |
2536 | return; |
2537 | } |
2538 | // Generate ThreadprivateOp and rebind the common block. |
2539 | mlir::Value commonThreadprivateValue = |
2540 | firOpBuilder.create<mlir::omp::ThreadprivateOp>( |
2541 | currentLocation, commonValue.getType(), commonValue); |
2542 | converter.bindSymbol(*common, commonThreadprivateValue); |
2543 | // Generate the threadprivate value for the common block member. |
2544 | symThreadprivateValue = genCommonBlockMember(converter, currentLocation, |
2545 | sym, commonThreadprivateValue); |
2546 | } else if (!var.isGlobal()) { |
2547 | // Non-global variable which can be in threadprivate directive must be one |
2548 | // variable in main program, and it has implicit SAVE attribute. Take it as |
2549 | // with SAVE attribute, so to create GlobalOp for it to simplify the |
2550 | // translation to LLVM IR. |
2551 | // Avoids performing multiple globalInitializations. |
2552 | fir::GlobalOp global; |
2553 | auto module = converter.getModuleOp(); |
2554 | std::string globalName = converter.mangleName(sym); |
2555 | if (module.lookupSymbol<fir::GlobalOp>(globalName)) |
2556 | global = module.lookupSymbol<fir::GlobalOp>(globalName); |
2557 | else |
2558 | global = globalInitialization(converter, firOpBuilder, sym, var, |
2559 | currentLocation); |
2560 | |
2561 | mlir::Value symValue = firOpBuilder.create<fir::AddrOfOp>( |
2562 | currentLocation, global.resultType(), global.getSymbol()); |
2563 | symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>( |
2564 | currentLocation, symValue.getType(), symValue); |
2565 | } else { |
2566 | mlir::Value symValue = converter.getSymbolAddress(sym); |
2567 | |
2568 | // The symbol may be use-associated multiple times, and nothing needs to be |
2569 | // done after the original symbol is mapped to the threadprivatized value |
2570 | // for the first time. Use the threadprivatized value directly. |
2571 | mlir::Operation *op; |
2572 | if (auto declOp = symValue.getDefiningOp<hlfir::DeclareOp>()) |
2573 | op = declOp.getMemref().getDefiningOp(); |
2574 | else |
2575 | op = symValue.getDefiningOp(); |
2576 | if (mlir::isa<mlir::omp::ThreadprivateOp>(op)) |
2577 | return; |
2578 | |
2579 | symThreadprivateValue = firOpBuilder.create<mlir::omp::ThreadprivateOp>( |
2580 | currentLocation, symValue.getType(), symValue); |
2581 | } |
2582 | |
2583 | fir::ExtendedValue sexv = converter.getSymbolExtendedValue(sym); |
2584 | fir::ExtendedValue symThreadprivateExv = |
2585 | getExtendedValue(sexv, symThreadprivateValue); |
2586 | converter.bindSymbol(sym, symThreadprivateExv); |
2587 | } |
2588 | |
2589 | // This function replicates threadprivate's behaviour of generating |
2590 | // an internal fir.GlobalOp for non-global variables in the main program |
2591 | // that have the implicit SAVE attribute, to simplifiy LLVM-IR and MLIR |
2592 | // generation. |
2593 | void Fortran::lower::genDeclareTargetIntGlobal( |
2594 | Fortran::lower::AbstractConverter &converter, |
2595 | const Fortran::lower::pft::Variable &var) { |
2596 | if (!var.isGlobal()) { |
2597 | // A non-global variable which can be in a declare target directive must |
2598 | // be a variable in the main program, and it has the implicit SAVE |
2599 | // attribute. We create a GlobalOp for it to simplify the translation to |
2600 | // LLVM IR. |
2601 | globalInitialization(converter, converter.getFirOpBuilder(), |
2602 | var.getSymbol(), var, converter.getCurrentLocation()); |
2603 | } |
2604 | } |
2605 | |
2606 | bool Fortran::lower::isOpenMPTargetConstruct( |
2607 | const Fortran::parser::OpenMPConstruct &omp) { |
2608 | llvm::omp::Directive dir = llvm::omp::Directive::OMPD_unknown; |
2609 | if (const auto *block = |
2610 | std::get_if<Fortran::parser::OpenMPBlockConstruct>(&omp.u)) { |
2611 | const auto &begin = |
2612 | std::get<Fortran::parser::OmpBeginBlockDirective>(block->t); |
2613 | dir = std::get<Fortran::parser::OmpBlockDirective>(begin.t).v; |
2614 | } else if (const auto *loop = |
2615 | std::get_if<Fortran::parser::OpenMPLoopConstruct>(&omp.u)) { |
2616 | const auto &begin = |
2617 | std::get<Fortran::parser::OmpBeginLoopDirective>(loop->t); |
2618 | dir = std::get<Fortran::parser::OmpLoopDirective>(begin.t).v; |
2619 | } |
2620 | return llvm::omp::allTargetSet.test(dir); |
2621 | } |
2622 | |
2623 | void Fortran::lower::gatherOpenMPDeferredDeclareTargets( |
2624 | Fortran::lower::AbstractConverter &converter, |
2625 | Fortran::semantics::SemanticsContext &semaCtx, |
2626 | Fortran::lower::pft::Evaluation &eval, |
2627 | const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl, |
2628 | llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> |
2629 | &deferredDeclareTarget) { |
2630 | std::visit( |
2631 | Fortran::common::visitors{ |
2632 | [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { |
2633 | collectDeferredDeclareTargets(converter, semaCtx, eval, ompReq, |
2634 | deferredDeclareTarget); |
2635 | }, |
2636 | [&](const auto &) {}, |
2637 | }, |
2638 | ompDecl.u); |
2639 | } |
2640 | |
2641 | bool Fortran::lower::isOpenMPDeviceDeclareTarget( |
2642 | Fortran::lower::AbstractConverter &converter, |
2643 | Fortran::semantics::SemanticsContext &semaCtx, |
2644 | Fortran::lower::pft::Evaluation &eval, |
2645 | const Fortran::parser::OpenMPDeclarativeConstruct &ompDecl) { |
2646 | return std::visit( |
2647 | Fortran::common::visitors{ |
2648 | [&](const Fortran::parser::OpenMPDeclareTargetConstruct &ompReq) { |
2649 | mlir::omp::DeclareTargetDeviceType targetType = |
2650 | getDeclareTargetFunctionDevice(converter, semaCtx, eval, ompReq) |
2651 | .value_or(mlir::omp::DeclareTargetDeviceType::host); |
2652 | return targetType != mlir::omp::DeclareTargetDeviceType::host; |
2653 | }, |
2654 | [&](const auto &) { return false; }, |
2655 | }, |
2656 | ompDecl.u); |
2657 | } |
2658 | |
2659 | // In certain cases such as subroutine or function interfaces which declare |
2660 | // but do not define or directly call the subroutine or function in the same |
2661 | // module, their lowering is delayed until after the declare target construct |
2662 | // itself is processed, so there symbol is not within the table. |
2663 | // |
2664 | // This function will also return true if we encounter any device declare |
2665 | // target cases, to satisfy checking if we require the requires attributes |
2666 | // on the module. |
2667 | bool Fortran::lower::markOpenMPDeferredDeclareTargetFunctions( |
2668 | mlir::Operation *mod, |
2669 | llvm::SmallVectorImpl<OMPDeferredDeclareTargetInfo> &deferredDeclareTargets, |
2670 | AbstractConverter &converter) { |
2671 | bool deviceCodeFound = false; |
2672 | auto modOp = llvm::cast<mlir::ModuleOp>(mod); |
2673 | for (auto declTar : deferredDeclareTargets) { |
2674 | mlir::Operation *op = modOp.lookupSymbol(converter.mangleName(declTar.sym)); |
2675 | |
2676 | // Due to interfaces being optionally emitted on usage in a module, |
2677 | // not finding an operation at this point cannot be a hard error, we |
2678 | // simply ignore it for now. |
2679 | // TODO: Add semantic checks for detecting cases where an erronous |
2680 | // (undefined) symbol has been supplied to a declare target clause |
2681 | if (!op) |
2682 | continue; |
2683 | |
2684 | auto devType = declTar.declareTargetDeviceType; |
2685 | if (!deviceCodeFound && devType != mlir::omp::DeclareTargetDeviceType::host) |
2686 | deviceCodeFound = true; |
2687 | |
2688 | markDeclareTarget(op, converter, declTar.declareTargetCaptureClause, |
2689 | devType); |
2690 | } |
2691 | |
2692 | return deviceCodeFound; |
2693 | } |
2694 | |
2695 | void Fortran::lower::genOpenMPRequires( |
2696 | mlir::Operation *mod, const Fortran::semantics::Symbol *symbol) { |
2697 | using MlirRequires = mlir::omp::ClauseRequires; |
2698 | using SemaRequires = Fortran::semantics::WithOmpDeclarative::RequiresFlag; |
2699 | |
2700 | if (auto offloadMod = |
2701 | llvm::dyn_cast<mlir::omp::OffloadModuleInterface>(mod)) { |
2702 | Fortran::semantics::WithOmpDeclarative::RequiresFlags semaFlags; |
2703 | if (symbol) { |
2704 | Fortran::common::visit( |
2705 | [&](const auto &details) { |
2706 | if constexpr (std::is_base_of_v< |
2707 | Fortran::semantics::WithOmpDeclarative, |
2708 | std::decay_t<decltype(details)>>) { |
2709 | if (details.has_ompRequires()) |
2710 | semaFlags = *details.ompRequires(); |
2711 | } |
2712 | }, |
2713 | symbol->details()); |
2714 | } |
2715 | |
2716 | MlirRequires mlirFlags = MlirRequires::none; |
2717 | if (semaFlags.test(SemaRequires::ReverseOffload)) |
2718 | mlirFlags = mlirFlags | MlirRequires::reverse_offload; |
2719 | if (semaFlags.test(SemaRequires::UnifiedAddress)) |
2720 | mlirFlags = mlirFlags | MlirRequires::unified_address; |
2721 | if (semaFlags.test(SemaRequires::UnifiedSharedMemory)) |
2722 | mlirFlags = mlirFlags | MlirRequires::unified_shared_memory; |
2723 | if (semaFlags.test(SemaRequires::DynamicAllocators)) |
2724 | mlirFlags = mlirFlags | MlirRequires::dynamic_allocators; |
2725 | |
2726 | offloadMod.setRequires(mlirFlags); |
2727 | } |
2728 | } |
2729 | |