1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "CGCXXABI.h"
15#include "CGCleanup.h"
16#include "CGRecordLayout.h"
17#include "CodeGenFunction.h"
18#include "TargetInfo.h"
19#include "clang/AST/APValue.h"
20#include "clang/AST/Attr.h"
21#include "clang/AST/Decl.h"
22#include "clang/AST/OpenMPClause.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/BitmaskEnum.h"
26#include "clang/Basic/FileManager.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SetOperations.h"
32#include "llvm/ADT/SmallBitVector.h"
33#include "llvm/ADT/StringExtras.h"
34#include "llvm/Bitcode/BitcodeReader.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/GlobalValue.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Value.h"
40#include "llvm/Support/AtomicOrdering.h"
41#include "llvm/Support/Format.h"
42#include "llvm/Support/raw_ostream.h"
43#include <cassert>
44#include <cstdint>
45#include <numeric>
46#include <optional>
47
48using namespace clang;
49using namespace CodeGen;
50using namespace llvm::omp;
51
52namespace {
53/// Base class for handling code generation inside OpenMP regions.
54class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
55public:
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind {
58 /// Region with outlined function for standalone 'parallel'
59 /// directive.
60 ParallelOutlinedRegion,
61 /// Region with outlined function for standalone 'task' directive.
62 TaskOutlinedRegion,
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
65 InlinedRegion,
66 /// Region with outlined function for standalone 'target' directive.
67 TargetRegion,
68 };
69
70 CGOpenMPRegionInfo(const CapturedStmt &CS,
71 const CGOpenMPRegionKind RegionKind,
72 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
73 bool HasCancel)
74 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
75 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
76
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
78 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
79 bool HasCancel)
80 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
81 Kind(Kind), HasCancel(HasCancel) {}
82
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl *getThreadIDVariable() const = 0;
86
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
89
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
93
94 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
95
96 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
97
98 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
99
100 bool hasCancel() const { return HasCancel; }
101
102 static bool classof(const CGCapturedStmtInfo *Info) {
103 return Info->getKind() == CR_OpenMP;
104 }
105
106 ~CGOpenMPRegionInfo() override = default;
107
108protected:
109 CGOpenMPRegionKind RegionKind;
110 RegionCodeGenTy CodeGen;
111 OpenMPDirectiveKind Kind;
112 bool HasCancel;
113};
114
115/// API for captured statement code generation in OpenMP constructs.
116class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
117public:
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
119 const RegionCodeGenTy &CodeGen,
120 OpenMPDirectiveKind Kind, bool HasCancel,
121 StringRef HelperName)
122 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
123 HasCancel),
124 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
125 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
126 }
127
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
131
132 /// Get the name of the capture helper.
133 StringRef getHelperName() const override { return HelperName; }
134
135 static bool classof(const CGCapturedStmtInfo *Info) {
136 return CGOpenMPRegionInfo::classof(Info) &&
137 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
138 ParallelOutlinedRegion;
139 }
140
141private:
142 /// A variable or parameter storing global thread id for OpenMP
143 /// constructs.
144 const VarDecl *ThreadIDVar;
145 StringRef HelperName;
146};
147
148/// API for captured statement code generation in OpenMP constructs.
149class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
150public:
151 class UntiedTaskActionTy final : public PrePostActionTy {
152 bool Untied;
153 const VarDecl *PartIDVar;
154 const RegionCodeGenTy UntiedCodeGen;
155 llvm::SwitchInst *UntiedSwitch = nullptr;
156
157 public:
158 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
159 const RegionCodeGenTy &UntiedCodeGen)
160 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
161 void Enter(CodeGenFunction &CGF) override {
162 if (Untied) {
163 // Emit task switching point.
164 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
165 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
166 PtrTy: PartIDVar->getType()->castAs<PointerType>());
167 llvm::Value *Res =
168 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
169 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
170 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
171 CGF.EmitBlock(BB: DoneBB);
172 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
173 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
174 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
175 Dest: CGF.Builder.GetInsertBlock());
176 emitUntiedSwitch(CGF);
177 }
178 }
179 void emitUntiedSwitch(CodeGenFunction &CGF) const {
180 if (Untied) {
181 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
182 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
183 PtrTy: PartIDVar->getType()->castAs<PointerType>());
184 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
185 lvalue: PartIdLVal);
186 UntiedCodeGen(CGF);
187 CodeGenFunction::JumpDest CurPoint =
188 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
189 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
190 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
191 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
192 Dest: CGF.Builder.GetInsertBlock());
193 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
194 CGF.EmitBlock(BB: CurPoint.getBlock());
195 }
196 }
197 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
198 };
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
200 const VarDecl *ThreadIDVar,
201 const RegionCodeGenTy &CodeGen,
202 OpenMPDirectiveKind Kind, bool HasCancel,
203 const UntiedTaskActionTy &Action)
204 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
205 ThreadIDVar(ThreadIDVar), Action(Action) {
206 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
207 }
208
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
212
213 /// Get an LValue for the current ThreadID variable.
214 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
215
216 /// Get the name of the capture helper.
217 StringRef getHelperName() const override { return ".omp_outlined."; }
218
219 void emitUntiedSwitch(CodeGenFunction &CGF) override {
220 Action.emitUntiedSwitch(CGF);
221 }
222
223 static bool classof(const CGCapturedStmtInfo *Info) {
224 return CGOpenMPRegionInfo::classof(Info) &&
225 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
226 TaskOutlinedRegion;
227 }
228
229private:
230 /// A variable or parameter storing global thread id for OpenMP
231 /// constructs.
232 const VarDecl *ThreadIDVar;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy &Action;
235};
236
237/// API for inlined captured statement code generation in OpenMP
238/// constructs.
239class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
240public:
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
242 const RegionCodeGenTy &CodeGen,
243 OpenMPDirectiveKind Kind, bool HasCancel)
244 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
245 OldCSI(OldCSI),
246 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
247
248 // Retrieve the value of the context parameter.
249 llvm::Value *getContextValue() const override {
250 if (OuterRegionInfo)
251 return OuterRegionInfo->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
253 }
254
255 void setContextValue(llvm::Value *V) override {
256 if (OuterRegionInfo) {
257 OuterRegionInfo->setContextValue(V);
258 return;
259 }
260 llvm_unreachable("No context value for inlined OpenMP region");
261 }
262
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl *lookup(const VarDecl *VD) const override {
265 if (OuterRegionInfo)
266 return OuterRegionInfo->lookup(VD);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
269 return nullptr;
270 }
271
272 FieldDecl *getThisFieldDecl() const override {
273 if (OuterRegionInfo)
274 return OuterRegionInfo->getThisFieldDecl();
275 return nullptr;
276 }
277
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl *getThreadIDVariable() const override {
281 if (OuterRegionInfo)
282 return OuterRegionInfo->getThreadIDVariable();
283 return nullptr;
284 }
285
286 /// Get an LValue for the current ThreadID variable.
287 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
288 if (OuterRegionInfo)
289 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
291 }
292
293 /// Get the name of the capture helper.
294 StringRef getHelperName() const override {
295 if (auto *OuterRegionInfo = getOldCSI())
296 return OuterRegionInfo->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
298 }
299
300 void emitUntiedSwitch(CodeGenFunction &CGF) override {
301 if (OuterRegionInfo)
302 OuterRegionInfo->emitUntiedSwitch(CGF);
303 }
304
305 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
306
307 static bool classof(const CGCapturedStmtInfo *Info) {
308 return CGOpenMPRegionInfo::classof(Info) &&
309 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
310 }
311
312 ~CGOpenMPInlinedRegionInfo() override = default;
313
314private:
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
317 CGOpenMPRegionInfo *OuterRegionInfo;
318};
319
320/// API for captured statement code generation in OpenMP target
321/// constructs. For this captures, implicit parameters are used instead of the
322/// captured fields. The name of the target region has to be unique in a given
323/// application so it is provided by the client, because only the client has
324/// the information to generate that.
325class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
326public:
327 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
328 const RegionCodeGenTy &CodeGen, StringRef HelperName)
329 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
330 /*HasCancel=*/false),
331 HelperName(HelperName) {}
332
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl *getThreadIDVariable() const override { return nullptr; }
336
337 /// Get the name of the capture helper.
338 StringRef getHelperName() const override { return HelperName; }
339
340 static bool classof(const CGCapturedStmtInfo *Info) {
341 return CGOpenMPRegionInfo::classof(Info) &&
342 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
343 }
344
345private:
346 StringRef HelperName;
347};
348
349static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
350 llvm_unreachable("No codegen for expressions");
351}
352/// API for generation of expressions captured in a innermost OpenMP
353/// region.
354class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
355public:
356 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
357 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
358 OMPD_unknown,
359 /*HasCancel=*/false),
360 PrivScope(CGF) {
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C : CS.captures()) {
365 if (!C.capturesVariable() && !C.capturesVariableByCopy())
366 continue;
367
368 const VarDecl *VD = C.getCapturedVar();
369 if (VD->isLocalVarDeclOrParm())
370 continue;
371
372 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD->getType().getNonReferenceType(), VK_LValue,
375 C.getLocation());
376 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(&DRE).getAddress(CGF));
377 }
378 (void)PrivScope.Privatize();
379 }
380
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl *lookup(const VarDecl *VD) const override {
383 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
384 return FD;
385 return nullptr;
386 }
387
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
390 llvm_unreachable("No body for expressions");
391 }
392
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl *getThreadIDVariable() const override {
396 llvm_unreachable("No thread id for expressions");
397 }
398
399 /// Get the name of the capture helper.
400 StringRef getHelperName() const override {
401 llvm_unreachable("No helper name for expressions");
402 }
403
404 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
405
406private:
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope;
409};
410
411/// RAII for emitting code of OpenMP constructs.
412class InlinedOpenMPRegionRAII {
413 CodeGenFunction &CGF;
414 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
415 FieldDecl *LambdaThisCaptureField = nullptr;
416 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
417 bool NoInheritance = false;
418
419public:
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
423 /// regions.
424 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
425 OpenMPDirectiveKind Kind, bool HasCancel,
426 bool NoInheritance = true)
427 : CGF(CGF), NoInheritance(NoInheritance) {
428 // Start emission for the construct.
429 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
430 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
431 if (NoInheritance) {
432 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
433 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
434 CGF.LambdaThisCaptureField = nullptr;
435 BlockInfo = CGF.BlockInfo;
436 CGF.BlockInfo = nullptr;
437 }
438 }
439
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
442 auto *OldCSI =
443 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
444 delete CGF.CapturedStmtInfo;
445 CGF.CapturedStmtInfo = OldCSI;
446 if (NoInheritance) {
447 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
448 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
449 CGF.BlockInfo = BlockInfo;
450 }
451 }
452};
453
454/// Values for bit flags used in the ident_t to describe the fields.
455/// All enumeric elements are named and described in accordance with the code
456/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457enum OpenMPLocationFlags : unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD = 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC = 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE = 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL = 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL = 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP = 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS = 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
481};
482
483/// Describes ident structure that describes a source location.
484/// All descriptions are taken from
485/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486/// Original structure:
487/// typedef struct ident {
488/// kmp_int32 reserved_1; /**< might be used in Fortran;
489/// see above */
490/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491/// KMP_IDENT_KMPC identifies this union
492/// member */
493/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
494/// see above */
495///#if USE_ITT_BUILD
496/// /* but currently used for storing
497/// region-specific ITT */
498/// /* contextual information. */
499///#endif /* USE_ITT_BUILD */
500/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
501/// C++ */
502/// char const *psource; /**< String describing the source location.
503/// The string is composed of semi-colon separated
504// fields which describe the source file,
505/// the function and a pair of line numbers that
506/// delimit the construct.
507/// */
508/// } ident_t;
509enum IdentFieldIndex {
510 /// might be used in Fortran
511 IdentField_Reserved_1,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
513 IdentField_Flags,
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
521 IdentField_PSource
522};
523
524/// Schedule types for 'omp for' loops (these enumerators are taken from
525/// the enum sched_type in kmp.h).
526enum OpenMPSchedType {
527 /// Lower bound for default (unordered) versions.
528 OMP_sch_lower = 32,
529 OMP_sch_static_chunked = 33,
530 OMP_sch_static = 34,
531 OMP_sch_dynamic_chunked = 35,
532 OMP_sch_guided_chunked = 36,
533 OMP_sch_runtime = 37,
534 OMP_sch_auto = 38,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked = 45,
537 /// Lower bound for 'ordered' versions.
538 OMP_ord_lower = 64,
539 OMP_ord_static_chunked = 65,
540 OMP_ord_static = 66,
541 OMP_ord_dynamic_chunked = 67,
542 OMP_ord_guided_chunked = 68,
543 OMP_ord_runtime = 69,
544 OMP_ord_auto = 70,
545 OMP_sch_default = OMP_sch_static,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked = 91,
548 OMP_dist_sch_static = 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic = (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic = (1 << 30),
554};
555
556/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
557/// region.
558class CleanupTy final : public EHScopeStack::Cleanup {
559 PrePostActionTy *Action;
560
561public:
562 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
563 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
564 if (!CGF.HaveInsertPoint())
565 return;
566 Action->Exit(CGF);
567 }
568};
569
570} // anonymous namespace
571
572void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
573 CodeGenFunction::RunCleanupsScope Scope(CGF);
574 if (PrePostAction) {
575 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
576 Callback(CodeGen, CGF, *PrePostAction);
577 } else {
578 PrePostActionTy Action;
579 Callback(CodeGen, CGF, Action);
580 }
581}
582
583/// Check if the combiner is a call to UDR combiner and if it is so return the
584/// UDR decl used for reduction.
585static const OMPDeclareReductionDecl *
586getReductionInit(const Expr *ReductionOp) {
587 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
588 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
589 if (const auto *DRE =
590 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
592 return DRD;
593 return nullptr;
594}
595
596static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
597 const OMPDeclareReductionDecl *DRD,
598 const Expr *InitOp,
599 Address Private, Address Original,
600 QualType Ty) {
601 if (DRD->getInitializer()) {
602 std::pair<llvm::Function *, llvm::Function *> Reduction =
603 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
604 const auto *CE = cast<CallExpr>(Val: InitOp);
605 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
606 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
608 const auto *LHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
610 const auto *RHSDRE =
611 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
613 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
614 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
615 (void)PrivateScope.Privatize();
616 RValue Func = RValue::get(V: Reduction.second);
617 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
618 CGF.EmitIgnoredExpr(E: InitOp);
619 } else {
620 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
621 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
622 auto *GV = new llvm::GlobalVariable(
623 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage, Init, Name);
625 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
626 RValue InitRVal;
627 switch (CGF.getEvaluationKind(T: Ty)) {
628 case TEK_Scalar:
629 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
630 break;
631 case TEK_Complex:
632 InitRVal =
633 RValue::getComplex(CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
634 break;
635 case TEK_Aggregate: {
636 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
637 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
638 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
639 /*IsInitializer=*/false);
640 return;
641 }
642 }
643 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
644 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
645 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
646 /*IsInitializer=*/false);
647 }
648}
649
650/// Emit initialization of arrays of complex types.
651/// \param DestAddr Address of the array.
652/// \param Type Type of array.
653/// \param Init Initial expression of array.
654/// \param SrcAddr Address of the original array.
655static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
656 QualType Type, bool EmitDeclareReductionInit,
657 const Expr *Init,
658 const OMPDeclareReductionDecl *DRD,
659 Address SrcAddr = Address::invalid()) {
660 // Perform element-by-element initialization.
661 QualType ElementTy;
662
663 // Drill down to the base element type on both arrays.
664 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
665 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
666 if (DRD)
667 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
668
669 llvm::Value *SrcBegin = nullptr;
670 if (DRD)
671 SrcBegin = SrcAddr.emitRawPointer(CGF);
672 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value *DestEnd =
675 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
678 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
679 llvm::Value *IsEmpty =
680 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
681 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
682
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
685 CGF.EmitBlock(BB: BodyBB);
686
687 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
688
689 llvm::PHINode *SrcElementPHI = nullptr;
690 Address SrcElementCurrent = Address::invalid();
691 if (DRD) {
692 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
693 Name: "omp.arraycpy.srcElementPast");
694 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
695 SrcElementCurrent =
696 Address(SrcElementPHI, SrcAddr.getElementType(),
697 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
698 }
699 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
700 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
701 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
702 Address DestElementCurrent =
703 Address(DestElementPHI, DestAddr.getElementType(),
704 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
705
706 // Emit copy.
707 {
708 CodeGenFunction::RunCleanupsScope InitScope(CGF);
709 if (EmitDeclareReductionInit) {
710 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
711 Original: SrcElementCurrent, Ty: ElementTy);
712 } else
713 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
714 /*IsInitializer=*/false);
715 }
716
717 if (DRD) {
718 // Shift the address forward by one element.
719 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
720 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
721 Name: "omp.arraycpy.dest.element");
722 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
723 }
724
725 // Shift the address forward by one element.
726 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
727 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
728 Name: "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
730 llvm::Value *Done =
731 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
732 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
733 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
734
735 // Done.
736 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
737}
738
739LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
740 return CGF.EmitOMPSharedLValue(E);
741}
742
743LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
744 const Expr *E) {
745 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E))
746 return CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
747 return LValue();
748}
749
750void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
752 const OMPDeclareReductionDecl *DRD) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
755 // captured region.
756 const auto *PrivateVD =
757 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
758 bool EmitDeclareReductionInit =
759 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
760 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
761 EmitDeclareReductionInit,
762 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
763 : PrivateVD->getInit(),
764 DRD, SharedAddr);
765}
766
767ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
768 ArrayRef<const Expr *> Origs,
769 ArrayRef<const Expr *> Privates,
770 ArrayRef<const Expr *> ReductionOps) {
771 ClausesData.reserve(N: Shareds.size());
772 SharedAddresses.reserve(N: Shareds.size());
773 Sizes.reserve(N: Shareds.size());
774 BaseDecls.reserve(N: Shareds.size());
775 const auto *IOrig = Origs.begin();
776 const auto *IPriv = Privates.begin();
777 const auto *IRed = ReductionOps.begin();
778 for (const Expr *Ref : Shareds) {
779 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
780 std::advance(i&: IOrig, n: 1);
781 std::advance(i&: IPriv, n: 1);
782 std::advance(i&: IRed, n: 1);
783 }
784}
785
786void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
787 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
788 "Number of generated lvalues must be exactly N.");
789 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
790 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
791 SharedAddresses.emplace_back(Args&: First, Args&: Second);
792 if (ClausesData[N].Shared == ClausesData[N].Ref) {
793 OrigAddresses.emplace_back(Args&: First, Args&: Second);
794 } else {
795 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
796 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
797 OrigAddresses.emplace_back(Args&: First, Args&: Second);
798 }
799}
800
801void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
802 QualType PrivateType = getPrivateType(N);
803 bool AsArraySection = isa<OMPArraySectionExpr>(Val: ClausesData[N].Ref);
804 if (!PrivateType->isVariablyModifiedType()) {
805 Sizes.emplace_back(
806 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
807 nullptr);
808 return;
809 }
810 llvm::Value *Size;
811 llvm::Value *SizeInChars;
812 auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
813 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
814 if (AsArraySection) {
815 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
816 LHS: OrigAddresses[N].second.getPointer(CGF),
817 RHS: OrigAddresses[N].first.getPointer(CGF));
818 Size = CGF.Builder.CreateNUWAdd(
819 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
820 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
821 } else {
822 SizeInChars =
823 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
824 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
825 }
826 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
827 CodeGenFunction::OpaqueValueMapping OpaqueMap(
828 CGF,
829 cast<OpaqueValueExpr>(
830 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
831 RValue::get(V: Size));
832 CGF.EmitVariablyModifiedType(Ty: PrivateType);
833}
834
835void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
836 llvm::Value *Size) {
837 QualType PrivateType = getPrivateType(N);
838 if (!PrivateType->isVariablyModifiedType()) {
839 assert(!Size && !Sizes[N].second &&
840 "Size should be nullptr for non-variably modified reduction "
841 "items.");
842 return;
843 }
844 CodeGenFunction::OpaqueValueMapping OpaqueMap(
845 CGF,
846 cast<OpaqueValueExpr>(
847 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
848 RValue::get(V: Size));
849 CGF.EmitVariablyModifiedType(Ty: PrivateType);
850}
851
852void ReductionCodeGen::emitInitialization(
853 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
854 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
855 assert(SharedAddresses.size() > N && "No variable was generated");
856 const auto *PrivateVD =
857 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
858 const OMPDeclareReductionDecl *DRD =
859 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
860 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
861 if (DRD && DRD->getInitializer())
862 (void)DefaultInit(CGF);
863 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
864 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
865 (void)DefaultInit(CGF);
866 QualType SharedType = SharedAddresses[N].first.getType();
867 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
868 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
869 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
870 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
871 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
872 Quals: PrivateVD->getType().getQualifiers(),
873 /*IsInitializer=*/false);
874 }
875}
876
877bool ReductionCodeGen::needCleanups(unsigned N) {
878 QualType PrivateType = getPrivateType(N);
879 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
880 return DTorKind != QualType::DK_none;
881}
882
883void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
884 Address PrivateAddr) {
885 QualType PrivateType = getPrivateType(N);
886 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
887 if (needCleanups(N)) {
888 PrivateAddr =
889 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
890 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
891 }
892}
893
894static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
895 LValue BaseLV) {
896 BaseTy = BaseTy.getNonReferenceType();
897 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
898 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
899 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
900 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(CGF), PtrTy);
901 } else {
902 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(CGF), T: BaseTy);
903 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
904 }
905 BaseTy = BaseTy->getPointeeType();
906 }
907 return CGF.MakeAddrLValue(
908 Addr: BaseLV.getAddress(CGF).withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
909 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
910 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
911}
912
913static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
914 Address OriginalBaseAddress, llvm::Value *Addr) {
915 RawAddress Tmp = RawAddress::invalid();
916 Address TopTmp = Address::invalid();
917 Address MostTopTmp = Address::invalid();
918 BaseTy = BaseTy.getNonReferenceType();
919 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
920 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
921 Tmp = CGF.CreateMemTemp(T: BaseTy);
922 if (TopTmp.isValid())
923 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
924 else
925 MostTopTmp = Tmp;
926 TopTmp = Tmp;
927 BaseTy = BaseTy->getPointeeType();
928 }
929
930 if (Tmp.isValid()) {
931 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
932 V: Addr, DestTy: Tmp.getElementType());
933 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
934 return MostTopTmp;
935 }
936
937 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
938 V: Addr, DestTy: OriginalBaseAddress.getType());
939 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
940}
941
942static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
943 const VarDecl *OrigVD = nullptr;
944 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: Ref)) {
945 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Val: Base))
947 Base = TempOASE->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
949 Base = TempASE->getBase()->IgnoreParenImpCasts();
950 DE = cast<DeclRefExpr>(Val: Base);
951 OrigVD = cast<VarDecl>(Val: DE->getDecl());
952 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
953 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
955 Base = TempASE->getBase()->IgnoreParenImpCasts();
956 DE = cast<DeclRefExpr>(Val: Base);
957 OrigVD = cast<VarDecl>(Val: DE->getDecl());
958 }
959 return OrigVD;
960}
961
962Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
963 Address PrivateAddr) {
964 const DeclRefExpr *DE;
965 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
966 BaseDecls.emplace_back(Args&: OrigVD);
967 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
968 LValue BaseLValue =
969 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
970 OriginalBaseLValue);
971 Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
972 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
973 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
974 RHS: SharedAddr.emitRawPointer(CGF));
975 llvm::Value *PrivatePointer =
976 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
977 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
978 llvm::Value *Ptr = CGF.Builder.CreateGEP(
979 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
980 return castToBase(CGF, OrigVD->getType(),
981 SharedAddresses[N].first.getType(),
982 OriginalBaseLValue.getAddress(CGF), Ptr);
983 }
984 BaseDecls.emplace_back(
985 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
986 return PrivateAddr;
987}
988
989bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
990 const OMPDeclareReductionDecl *DRD =
991 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
992 return DRD && DRD->getInitializer();
993}
994
995LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
996 return CGF.EmitLoadOfPointerLValue(
997 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
998 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
999}
1000
1001void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1002 if (!CGF.HaveInsertPoint())
1003 return;
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF.EHStack.pushTerminate();
1010 if (S)
1011 CGF.incrementProfileCounter(S);
1012 CodeGen(CGF);
1013 CGF.EHStack.popTerminate();
1014}
1015
1016LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction &CGF) {
1018 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1020 AlignmentSource::Decl);
1021}
1022
1023static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1024 QualType FieldTy) {
1025 auto *Field = FieldDecl::Create(
1026 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1027 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1029 Field->setAccess(AS_public);
1030 DC->addDecl(Field);
1031 return Field;
1032}
1033
1034CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1035 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1036 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig Config(
1038 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1039 CGM.getLangOpts().OpenMPOffloadMandatory,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder.initialize();
1043 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1044 ? CGM.getLangOpts().OMPHostIRFile
1045 : StringRef{});
1046 OMPBuilder.setConfig(Config);
1047
1048 // The user forces the compiler to behave as if omp requires
1049 // unified_shared_memory was given.
1050 if (CGM.getLangOpts().OpenMPForceUSM) {
1051 HasRequiresUnifiedSharedMemory = true;
1052 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1053 }
1054}
1055
1056void CGOpenMPRuntime::clear() {
1057 InternalVars.clear();
1058 // Clean non-target variable declarations possibly used only in debug info.
1059 for (const auto &Data : EmittedNonTargetVariables) {
1060 if (!Data.getValue().pointsToAliveValue())
1061 continue;
1062 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1063 if (!GV)
1064 continue;
1065 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1066 continue;
1067 GV->eraseFromParent();
1068 }
1069}
1070
1071std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1072 return OMPBuilder.createPlatformSpecificName(Parts);
1073}
1074
1075static llvm::Function *
1076emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1077 const Expr *CombinerInitializer, const VarDecl *In,
1078 const VarDecl *Out, bool IsCombiner) {
1079 // void .omp_combiner.(Ty *in, Ty *out);
1080 ASTContext &C = CGM.getContext();
1081 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1082 FunctionArgList Args;
1083 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1086 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1087 Args.push_back(&OmpOutParm);
1088 Args.push_back(&OmpInParm);
1089 const CGFunctionInfo &FnInfo =
1090 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1091 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1092 std::string Name = CGM.getOpenMPRuntime().getName(
1093 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1094 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1095 N: Name, M: &CGM.getModule());
1096 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1097 if (CGM.getLangOpts().Optimize) {
1098 Fn->removeFnAttr(llvm::Attribute::NoInline);
1099 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1100 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1101 }
1102 CodeGenFunction CGF(CGM);
1103 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1104 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1105 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo, Args, Loc: In->getLocation(),
1106 StartLoc: Out->getLocation());
1107 CodeGenFunction::OMPPrivateScope Scope(CGF);
1108 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1109 Scope.addPrivate(
1110 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1111 .getAddress(CGF));
1112 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1113 Scope.addPrivate(
1114 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1115 .getAddress(CGF));
1116 (void)Scope.Privatize();
1117 if (!IsCombiner && Out->hasInit() &&
1118 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1119 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1120 Quals: Out->getType().getQualifiers(),
1121 /*IsInitializer=*/true);
1122 }
1123 if (CombinerInitializer)
1124 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1125 Scope.ForceCleanup();
1126 CGF.FinishFunction();
1127 return Fn;
1128}
1129
1130void CGOpenMPRuntime::emitUserDefinedReduction(
1131 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1132 if (UDRMap.count(Val: D) > 0)
1133 return;
1134 llvm::Function *Combiner = emitCombinerOrInitializer(
1135 CGM, D->getType(), D->getCombiner(),
1136 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1137 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1138 /*IsCombiner=*/true);
1139 llvm::Function *Initializer = nullptr;
1140 if (const Expr *Init = D->getInitializer()) {
1141 Initializer = emitCombinerOrInitializer(
1142 CGM, D->getType(),
1143 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1144 : nullptr,
1145 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1146 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1147 /*IsCombiner=*/false);
1148 }
1149 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1150 if (CGF) {
1151 auto &Decls = FunctionUDRMap.FindAndConstruct(Key: CGF->CurFn);
1152 Decls.second.push_back(Elt: D);
1153 }
1154}
1155
1156std::pair<llvm::Function *, llvm::Function *>
1157CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1158 auto I = UDRMap.find(Val: D);
1159 if (I != UDRMap.end())
1160 return I->second;
1161 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1162 return UDRMap.lookup(Val: D);
1163}
1164
1165namespace {
1166// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1167// Builder if one is present.
1168struct PushAndPopStackRAII {
1169 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1170 bool HasCancel, llvm::omp::Directive Kind)
1171 : OMPBuilder(OMPBuilder) {
1172 if (!OMPBuilder)
1173 return;
1174
1175 // The following callback is the crucial part of clangs cleanup process.
1176 //
1177 // NOTE:
1178 // Once the OpenMPIRBuilder is used to create parallel regions (and
1179 // similar), the cancellation destination (Dest below) is determined via
1180 // IP. That means if we have variables to finalize we split the block at IP,
1181 // use the new block (=BB) as destination to build a JumpDest (via
1182 // getJumpDestInCurrentScope(BB)) which then is fed to
1183 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1184 // to push & pop an FinalizationInfo object.
1185 // The FiniCB will still be needed but at the point where the
1186 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1187 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1188 assert(IP.getBlock()->end() == IP.getPoint() &&
1189 "Clang CG should cause non-terminated block!");
1190 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1191 CGF.Builder.restoreIP(IP);
1192 CodeGenFunction::JumpDest Dest =
1193 CGF.getOMPCancelDestination(OMPD_parallel);
1194 CGF.EmitBranchThroughCleanup(Dest);
1195 };
1196
1197 // TODO: Remove this once we emit parallel regions through the
1198 // OpenMPIRBuilder as it can do this setup internally.
1199 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1200 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1201 }
1202 ~PushAndPopStackRAII() {
1203 if (OMPBuilder)
1204 OMPBuilder->popFinalizationCB();
1205 }
1206 llvm::OpenMPIRBuilder *OMPBuilder;
1207};
1208} // namespace
1209
1210static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1211 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1212 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1213 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1214 assert(ThreadIDVar->getType()->isPointerType() &&
1215 "thread id variable must be of type kmp_int32 *");
1216 CodeGenFunction CGF(CGM, true);
1217 bool HasCancel = false;
1218 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1219 HasCancel = OPD->hasCancel();
1220 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1221 HasCancel = OPD->hasCancel();
1222 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1223 HasCancel = OPSD->hasCancel();
1224 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1225 HasCancel = OPFD->hasCancel();
1226 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1227 HasCancel = OPFD->hasCancel();
1228 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233 else if (const auto *OPFD =
1234 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1235 HasCancel = OPFD->hasCancel();
1236
1237 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1238 // parallel region to make cancellation barriers work properly.
1239 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1240 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1241 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1242 HasCancel, OutlinedHelperName);
1243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1244 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1245}
1246
1247std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1248 std::string Suffix = getName(Parts: {"omp_outlined"});
1249 return (Name + Suffix).str();
1250}
1251
1252std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1253 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1254}
1255
1256std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1257 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1258 return (Name + Suffix).str();
1259}
1260
1261llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1262 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1263 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1264 const RegionCodeGenTy &CodeGen) {
1265 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1266 return emitParallelOrTeamsOutlinedFunction(
1267 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1268 CodeGen);
1269}
1270
1271llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1272 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1273 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1274 const RegionCodeGenTy &CodeGen) {
1275 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1276 return emitParallelOrTeamsOutlinedFunction(
1277 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1278 CodeGen);
1279}
1280
1281llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1282 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1283 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1284 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1285 bool Tied, unsigned &NumberOfParts) {
1286 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1287 PrePostActionTy &) {
1288 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1289 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1290 llvm::Value *TaskArgs[] = {
1291 UpLoc, ThreadID,
1292 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1293 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1294 .getPointer(CGF)};
1295 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1296 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1297 TaskArgs);
1298 };
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1300 UntiedCodeGen);
1301 CodeGen.setAction(Action);
1302 assert(!ThreadIDVar->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region =
1305 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1306 : OMPD_task;
1307 const CapturedStmt *CS = D.getCapturedStmt(Region);
1308 bool HasCancel = false;
1309 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1310 HasCancel = TD->hasCancel();
1311 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1312 HasCancel = TD->hasCancel();
1313 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1314 HasCancel = TD->hasCancel();
1315 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1316 HasCancel = TD->hasCancel();
1317
1318 CodeGenFunction CGF(CGM, true);
1319 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1320 InnermostKind, HasCancel, Action);
1321 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1322 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1323 if (!Tied)
1324 NumberOfParts = Action.getNumberOfParts();
1325 return Res;
1326}
1327
1328void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1329 bool AtCurrentPoint) {
1330 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1331 assert(!Elem.second.ServiceInsertPt && "Insert point is set already.");
1332
1333 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1334 if (AtCurrentPoint) {
1335 Elem.second.ServiceInsertPt = new llvm::BitCastInst(
1336 Undef, CGF.Int32Ty, "svcpt", CGF.Builder.GetInsertBlock());
1337 } else {
1338 Elem.second.ServiceInsertPt =
1339 new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1340 Elem.second.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt);
1341 }
1342}
1343
1344void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1345 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1346 if (Elem.second.ServiceInsertPt) {
1347 llvm::Instruction *Ptr = Elem.second.ServiceInsertPt;
1348 Elem.second.ServiceInsertPt = nullptr;
1349 Ptr->eraseFromParent();
1350 }
1351}
1352
1353static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1354 SourceLocation Loc,
1355 SmallString<128> &Buffer) {
1356 llvm::raw_svector_ostream OS(Buffer);
1357 // Build debug location
1358 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1359 OS << ";" << PLoc.getFilename() << ";";
1360 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1361 OS << FD->getQualifiedNameAsString();
1362 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1363 return OS.str();
1364}
1365
1366llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1367 SourceLocation Loc,
1368 unsigned Flags, bool EmitLoc) {
1369 uint32_t SrcLocStrSize;
1370 llvm::Constant *SrcLocStr;
1371 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1372 llvm::codegenoptions::NoDebugInfo) ||
1373 Loc.isInvalid()) {
1374 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1375 } else {
1376 std::string FunctionName;
1377 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1378 FunctionName = FD->getQualifiedNameAsString();
1379 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1380 const char *FileName = PLoc.getFilename();
1381 unsigned Line = PLoc.getLine();
1382 unsigned Column = PLoc.getColumn();
1383 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1384 Column, SrcLocStrSize);
1385 }
1386 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1387 return OMPBuilder.getOrCreateIdent(
1388 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1389}
1390
1391llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1392 SourceLocation Loc) {
1393 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM.getLangOpts().OpenMPIRBuilder) {
1397 SmallString<128> Buffer;
1398 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1399 uint32_t SrcLocStrSize;
1400 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1401 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1402 return OMPBuilder.getOrCreateThreadID(
1403 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1404 }
1405
1406 llvm::Value *ThreadID = nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1408 // function.
1409 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1410 if (I != OpenMPLocThreadIDMap.end()) {
1411 ThreadID = I->second.ThreadID;
1412 if (ThreadID != nullptr)
1413 return ThreadID;
1414 }
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo =
1417 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1418 if (OMPRegionInfo->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1421 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1422 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1423 !CGF.getLangOpts().CXXExceptions ||
1424 CGF.Builder.GetInsertBlock() == TopBlock ||
1425 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1426 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1427 TopBlock ||
1428 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1429 CGF.Builder.GetInsertBlock()) {
1430 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1431 // If value loaded in entry block, cache it and use it everywhere in
1432 // function.
1433 if (CGF.Builder.GetInsertBlock() == TopBlock) {
1434 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1435 Elem.second.ThreadID = ThreadID;
1436 }
1437 return ThreadID;
1438 }
1439 }
1440 }
1441
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1445 // function.
1446 auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(Key: CGF.CurFn);
1447 if (!Elem.second.ServiceInsertPt)
1448 setLocThreadIdInsertPt(CGF);
1449 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1450 CGF.Builder.SetInsertPoint(Elem.second.ServiceInsertPt);
1451 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1452 llvm::CallInst *Call = CGF.Builder.CreateCall(
1453 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1454 FnID: OMPRTL___kmpc_global_thread_num),
1455 Args: emitUpdateLocation(CGF, Loc));
1456 Call->setCallingConv(CGF.getRuntimeCC());
1457 Elem.second.ThreadID = Call;
1458 return Call;
1459}
1460
1461void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1462 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1463 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1464 clearLocThreadIdInsertPt(CGF);
1465 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1466 }
1467 if (FunctionUDRMap.count(Val: CGF.CurFn) > 0) {
1468 for(const auto *D : FunctionUDRMap[CGF.CurFn])
1469 UDRMap.erase(Val: D);
1470 FunctionUDRMap.erase(Val: CGF.CurFn);
1471 }
1472 auto I = FunctionUDMMap.find(Val: CGF.CurFn);
1473 if (I != FunctionUDMMap.end()) {
1474 for(const auto *D : I->second)
1475 UDMMap.erase(Val: D);
1476 FunctionUDMMap.erase(I);
1477 }
1478 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1479 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1480}
1481
1482llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1483 return OMPBuilder.IdentPtr;
1484}
1485
1486llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1487 if (!Kmpc_MicroTy) {
1488 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1489 llvm::Type *MicroParams[] = {llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty),
1490 llvm::PointerType::getUnqual(ElementType: CGM.Int32Ty)};
1491 Kmpc_MicroTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: MicroParams, isVarArg: true);
1492 }
1493 return llvm::PointerType::getUnqual(ElementType: Kmpc_MicroTy);
1494}
1495
1496llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1497convertDeviceClause(const VarDecl *VD) {
1498 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1499 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1500 if (!DevTy)
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1502
1503 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1504 case OMPDeclareTargetDeclAttr::DT_Host:
1505 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1506 break;
1507 case OMPDeclareTargetDeclAttr::DT_NoHost:
1508 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1509 break;
1510 case OMPDeclareTargetDeclAttr::DT_Any:
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1512 break;
1513 default:
1514 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1515 break;
1516 }
1517}
1518
1519llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1520convertCaptureClause(const VarDecl *VD) {
1521 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1522 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1523 if (!MapType)
1524 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1525 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1526 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1527 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1528 break;
1529 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1530 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1531 break;
1532 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1533 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1534 break;
1535 default:
1536 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1537 break;
1538 }
1539}
1540
1541static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1542 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1543 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1544
1545 auto FileInfoCallBack = [&]() {
1546 SourceManager &SM = CGM.getContext().getSourceManager();
1547 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1548
1549 llvm::sys::fs::UniqueID ID;
1550 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1551 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1552 }
1553
1554 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1555 };
1556
1557 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1558}
1559
1560ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1561 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1562
1563 auto LinkageForVariable = [&VD, this]() {
1564 return CGM.getLLVMLinkageVarDefinition(VD);
1565 };
1566
1567 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1568
1569 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1570 T: CGM.getContext().getPointerType(VD->getType()));
1571 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1572 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1573 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1574 IsExternallyVisible: VD->isExternallyVisible(),
1575 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1576 VD->getCanonicalDecl()->getBeginLoc()),
1577 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1578 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1579 VariableLinkage: LinkageForVariable);
1580
1581 if (!addr)
1582 return ConstantAddress::invalid();
1583 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1584}
1585
1586llvm::Constant *
1587CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1588 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1589 !CGM.getContext().getTargetInfo().isTLSSupported());
1590 // Lookup the entry, lazily creating it if necessary.
1591 std::string Suffix = getName(Parts: {"cache", ""});
1592 return OMPBuilder.getOrCreateInternalVariable(
1593 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1594}
1595
1596Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1597 const VarDecl *VD,
1598 Address VDAddr,
1599 SourceLocation Loc) {
1600 if (CGM.getLangOpts().OpenMPUseTLS &&
1601 CGM.getContext().getTargetInfo().isTLSSupported())
1602 return VDAddr;
1603
1604 llvm::Type *VarTy = VDAddr.getElementType();
1605 llvm::Value *Args[] = {
1606 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1607 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1608 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1609 getOrCreateThreadPrivateCache(VD)};
1610 return Address(
1611 CGF.EmitRuntimeCall(
1612 callee: OMPBuilder.getOrCreateRuntimeFunction(
1613 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1614 args: Args),
1615 CGF.Int8Ty, VDAddr.getAlignment());
1616}
1617
1618void CGOpenMPRuntime::emitThreadPrivateVarInit(
1619 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1620 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1621 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1622 // library.
1623 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1624 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1625 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1626 args: OMPLoc);
1627 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1628 // to register constructor/destructor for variable.
1629 llvm::Value *Args[] = {
1630 OMPLoc,
1631 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1632 Ctor, CopyCtor, Dtor};
1633 CGF.EmitRuntimeCall(
1634 callee: OMPBuilder.getOrCreateRuntimeFunction(
1635 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1636 args: Args);
1637}
1638
1639llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1640 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1641 bool PerformInit, CodeGenFunction *CGF) {
1642 if (CGM.getLangOpts().OpenMPUseTLS &&
1643 CGM.getContext().getTargetInfo().isTLSSupported())
1644 return nullptr;
1645
1646 VD = VD->getDefinition(C&: CGM.getContext());
1647 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1648 QualType ASTTy = VD->getType();
1649
1650 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1651 const Expr *Init = VD->getAnyInitializer();
1652 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1653 // Generate function that re-emits the declaration's initializer into the
1654 // threadprivate copy of the variable VD
1655 CodeGenFunction CtorCGF(CGM);
1656 FunctionArgList Args;
1657 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1658 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1659 ImplicitParamKind::Other);
1660 Args.push_back(&Dst);
1661
1662 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1663 CGM.getContext().VoidPtrTy, Args);
1664 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1665 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1666 llvm::Function *Fn =
1667 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1668 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1669 Args, Loc, StartLoc: Loc);
1670 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1671 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1672 CGM.getContext().VoidPtrTy, Dst.getLocation());
1673 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1674 VDAddr.getAlignment());
1675 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1676 /*IsInitializer=*/true);
1677 ArgVal = CtorCGF.EmitLoadOfScalar(
1678 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1679 CGM.getContext().VoidPtrTy, Dst.getLocation());
1680 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1681 CtorCGF.FinishFunction();
1682 Ctor = Fn;
1683 }
1684 if (VD->getType().isDestructedType() != QualType::DK_none) {
1685 // Generate function that emits destructor call for the threadprivate copy
1686 // of the variable VD
1687 CodeGenFunction DtorCGF(CGM);
1688 FunctionArgList Args;
1689 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1690 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1691 ImplicitParamKind::Other);
1692 Args.push_back(&Dst);
1693
1694 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1695 CGM.getContext().VoidTy, Args);
1696 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1697 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1698 llvm::Function *Fn =
1699 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1700 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1701 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1702 Loc, StartLoc: Loc);
1703 // Create a scope with an artificial location for the body of this function.
1704 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1705 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1706 DtorCGF.GetAddrOfLocalVar(&Dst),
1707 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1708 DtorCGF.emitDestroy(
1709 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1710 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1711 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1712 DtorCGF.FinishFunction();
1713 Dtor = Fn;
1714 }
1715 // Do not emit init function if it is not required.
1716 if (!Ctor && !Dtor)
1717 return nullptr;
1718
1719 llvm::Type *CopyCtorTyArgs[] = {CGM.VoidPtrTy, CGM.VoidPtrTy};
1720 auto *CopyCtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CopyCtorTyArgs,
1721 /*isVarArg=*/false)
1722 ->getPointerTo();
1723 // Copying constructor for the threadprivate variable.
1724 // Must be NULL - reserved by runtime, but currently it requires that this
1725 // parameter is always NULL. Otherwise it fires assertion.
1726 CopyCtor = llvm::Constant::getNullValue(Ty: CopyCtorTy);
1727 if (Ctor == nullptr) {
1728 auto *CtorTy = llvm::FunctionType::get(Result: CGM.VoidPtrTy, Params: CGM.VoidPtrTy,
1729 /*isVarArg=*/false)
1730 ->getPointerTo();
1731 Ctor = llvm::Constant::getNullValue(Ty: CtorTy);
1732 }
1733 if (Dtor == nullptr) {
1734 auto *DtorTy = llvm::FunctionType::get(Result: CGM.VoidTy, Params: CGM.VoidPtrTy,
1735 /*isVarArg=*/false)
1736 ->getPointerTo();
1737 Dtor = llvm::Constant::getNullValue(Ty: DtorTy);
1738 }
1739 if (!CGF) {
1740 auto *InitFunctionTy =
1741 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1742 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1743 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1744 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1745 CodeGenFunction InitCGF(CGM);
1746 FunctionArgList ArgList;
1747 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1748 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1749 Loc, StartLoc: Loc);
1750 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1751 InitCGF.FinishFunction();
1752 return InitFunction;
1753 }
1754 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1755 }
1756 return nullptr;
1757}
1758
1759void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1760 llvm::GlobalValue *GV) {
1761 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1762 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1763
1764 // We only need to handle active 'indirect' declare target functions.
1765 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1766 return;
1767
1768 // Get a mangled name to store the new device global in.
1769 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1770 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1771 SmallString<128> Name;
1772 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1773
1774 // We need to generate a new global to hold the address of the indirectly
1775 // called device function. Doing this allows us to keep the visibility and
1776 // linkage of the associated function unchanged while allowing the runtime to
1777 // access its value.
1778 llvm::GlobalValue *Addr = GV;
1779 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1780 Addr = new llvm::GlobalVariable(
1781 CGM.getModule(), CGM.VoidPtrTy,
1782 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1783 nullptr, llvm::GlobalValue::NotThreadLocal,
1784 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1785 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1786 }
1787
1788 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1789 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1790 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1791 Linkage: llvm::GlobalValue::WeakODRLinkage);
1792}
1793
1794Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1795 QualType VarType,
1796 StringRef Name) {
1797 std::string Suffix = getName(Parts: {"artificial", ""});
1798 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1799 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1800 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1801 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1802 CGM.getTarget().isTLSSupported()) {
1803 GAddr->setThreadLocal(/*Val=*/true);
1804 return Address(GAddr, GAddr->getValueType(),
1805 CGM.getContext().getTypeAlignInChars(T: VarType));
1806 }
1807 std::string CacheSuffix = getName(Parts: {"cache", ""});
1808 llvm::Value *Args[] = {
1809 emitUpdateLocation(CGF, Loc: SourceLocation()),
1810 getThreadID(CGF, Loc: SourceLocation()),
1811 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1812 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1813 /*isSigned=*/false),
1814 OMPBuilder.getOrCreateInternalVariable(
1815 Ty: CGM.VoidPtrPtrTy,
1816 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1817 return Address(
1818 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1819 V: CGF.EmitRuntimeCall(
1820 callee: OMPBuilder.getOrCreateRuntimeFunction(
1821 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1822 args: Args),
1823 DestTy: VarLVType->getPointerTo(/*AddrSpace=*/0)),
1824 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1825}
1826
1827void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1828 const RegionCodeGenTy &ThenGen,
1829 const RegionCodeGenTy &ElseGen) {
1830 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1831
1832 // If the condition constant folds and can be elided, try to avoid emitting
1833 // the condition and the dead arm of the if/else.
1834 bool CondConstant;
1835 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1836 if (CondConstant)
1837 ThenGen(CGF);
1838 else
1839 ElseGen(CGF);
1840 return;
1841 }
1842
1843 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1844 // emit the conditional branch.
1845 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1846 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1847 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1848 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1849
1850 // Emit the 'then' code.
1851 CGF.EmitBlock(BB: ThenBlock);
1852 ThenGen(CGF);
1853 CGF.EmitBranch(Block: ContBlock);
1854 // Emit the 'else' code if present.
1855 // There is no need to emit line number for unconditional branch.
1856 (void)ApplyDebugLocation::CreateEmpty(CGF);
1857 CGF.EmitBlock(BB: ElseBlock);
1858 ElseGen(CGF);
1859 // There is no need to emit line number for unconditional branch.
1860 (void)ApplyDebugLocation::CreateEmpty(CGF);
1861 CGF.EmitBranch(Block: ContBlock);
1862 // Emit the continuation block for code after the if.
1863 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1864}
1865
1866void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1867 llvm::Function *OutlinedFn,
1868 ArrayRef<llvm::Value *> CapturedVars,
1869 const Expr *IfCond,
1870 llvm::Value *NumThreads) {
1871 if (!CGF.HaveInsertPoint())
1872 return;
1873 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1874 auto &M = CGM.getModule();
1875 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1876 this](CodeGenFunction &CGF, PrePostActionTy &) {
1877 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1878 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1879 llvm::Value *Args[] = {
1880 RTLoc,
1881 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1882 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: RT.getKmpc_MicroPointerTy())};
1883 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1884 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1885 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1886
1887 llvm::FunctionCallee RTLFn =
1888 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1889 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1890 };
1891 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1892 this](CodeGenFunction &CGF, PrePostActionTy &) {
1893 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1894 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1895 // Build calls:
1896 // __kmpc_serialized_parallel(&Loc, GTid);
1897 llvm::Value *Args[] = {RTLoc, ThreadID};
1898 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1899 M, FnID: OMPRTL___kmpc_serialized_parallel),
1900 args: Args);
1901
1902 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1903 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1904 RawAddress ZeroAddrBound =
1905 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1906 /*Name=*/".bound.zero.addr");
1907 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1908 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1909 // ThreadId for serialized parallels is 0.
1910 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
1911 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1912 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1913
1914 // Ensure we do not inline the function. This is trivially true for the ones
1915 // passed to __kmpc_fork_call but the ones called in serialized regions
1916 // could be inlined. This is not a perfect but it is closer to the invariant
1917 // we want, namely, every data environment starts with a new function.
1918 // TODO: We should pass the if condition to the runtime function and do the
1919 // handling there. Much cleaner code.
1920 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1921 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1922 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1923
1924 // __kmpc_end_serialized_parallel(&Loc, GTid);
1925 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1926 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1927 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1928 args: EndArgs);
1929 };
1930 if (IfCond) {
1931 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1932 } else {
1933 RegionCodeGenTy ThenRCG(ThenGen);
1934 ThenRCG(CGF);
1935 }
1936}
1937
1938// If we're inside an (outlined) parallel region, use the region info's
1939// thread-ID variable (it is passed in a first argument of the outlined function
1940// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1941// regular serial code region, get thread ID by calling kmp_int32
1942// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1943// return the address of that temp.
1944Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1945 SourceLocation Loc) {
1946 if (auto *OMPRegionInfo =
1947 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1948 if (OMPRegionInfo->getThreadIDVariable())
1949 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress(CGF);
1950
1951 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1952 QualType Int32Ty =
1953 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1954 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1955 CGF.EmitStoreOfScalar(value: ThreadID,
1956 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1957
1958 return ThreadIDTemp;
1959}
1960
1961llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1962 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1963 std::string Name = getName(Parts: {Prefix, "var"});
1964 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1965}
1966
1967namespace {
1968/// Common pre(post)-action for different OpenMP constructs.
1969class CommonActionTy final : public PrePostActionTy {
1970 llvm::FunctionCallee EnterCallee;
1971 ArrayRef<llvm::Value *> EnterArgs;
1972 llvm::FunctionCallee ExitCallee;
1973 ArrayRef<llvm::Value *> ExitArgs;
1974 bool Conditional;
1975 llvm::BasicBlock *ContBlock = nullptr;
1976
1977public:
1978 CommonActionTy(llvm::FunctionCallee EnterCallee,
1979 ArrayRef<llvm::Value *> EnterArgs,
1980 llvm::FunctionCallee ExitCallee,
1981 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1982 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1983 ExitArgs(ExitArgs), Conditional(Conditional) {}
1984 void Enter(CodeGenFunction &CGF) override {
1985 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1986 if (Conditional) {
1987 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1988 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1989 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1990 // Generate the branch (If-stmt)
1991 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1992 CGF.EmitBlock(BB: ThenBlock);
1993 }
1994 }
1995 void Done(CodeGenFunction &CGF) {
1996 // Emit the rest of blocks/branches
1997 CGF.EmitBranch(Block: ContBlock);
1998 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
1999 }
2000 void Exit(CodeGenFunction &CGF) override {
2001 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
2002 }
2003};
2004} // anonymous namespace
2005
2006void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
2007 StringRef CriticalName,
2008 const RegionCodeGenTy &CriticalOpGen,
2009 SourceLocation Loc, const Expr *Hint) {
2010 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2011 // CriticalOpGen();
2012 // __kmpc_end_critical(ident_t *, gtid, Lock);
2013 // Prepare arguments and build a call to __kmpc_critical
2014 if (!CGF.HaveInsertPoint())
2015 return;
2016 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2017 getCriticalRegionLock(CriticalName)};
2018 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2019 std::end(arr&: Args));
2020 if (Hint) {
2021 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2022 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2023 }
2024 CommonActionTy Action(
2025 OMPBuilder.getOrCreateRuntimeFunction(
2026 M&: CGM.getModule(),
2027 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2028 EnterArgs,
2029 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2030 FnID: OMPRTL___kmpc_end_critical),
2031 Args);
2032 CriticalOpGen.setAction(Action);
2033 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2034}
2035
2036void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2037 const RegionCodeGenTy &MasterOpGen,
2038 SourceLocation Loc) {
2039 if (!CGF.HaveInsertPoint())
2040 return;
2041 // if(__kmpc_master(ident_t *, gtid)) {
2042 // MasterOpGen();
2043 // __kmpc_end_master(ident_t *, gtid);
2044 // }
2045 // Prepare arguments and build a call to __kmpc_master
2046 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2047 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2048 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2049 Args,
2050 OMPBuilder.getOrCreateRuntimeFunction(
2051 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2052 Args,
2053 /*Conditional=*/true);
2054 MasterOpGen.setAction(Action);
2055 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2056 Action.Done(CGF);
2057}
2058
2059void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2060 const RegionCodeGenTy &MaskedOpGen,
2061 SourceLocation Loc, const Expr *Filter) {
2062 if (!CGF.HaveInsertPoint())
2063 return;
2064 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2065 // MaskedOpGen();
2066 // __kmpc_end_masked(iden_t *, gtid);
2067 // }
2068 // Prepare arguments and build a call to __kmpc_masked
2069 llvm::Value *FilterVal = Filter
2070 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2071 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2072 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2073 FilterVal};
2074 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2075 getThreadID(CGF, Loc)};
2076 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2077 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2078 Args,
2079 OMPBuilder.getOrCreateRuntimeFunction(
2080 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2081 ArgsEnd,
2082 /*Conditional=*/true);
2083 MaskedOpGen.setAction(Action);
2084 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2085 Action.Done(CGF);
2086}
2087
2088void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2089 SourceLocation Loc) {
2090 if (!CGF.HaveInsertPoint())
2091 return;
2092 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2093 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2094 } else {
2095 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2096 llvm::Value *Args[] = {
2097 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2098 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2099 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2100 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2101 args: Args);
2102 }
2103
2104 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2105 Region->emitUntiedSwitch(CGF);
2106}
2107
2108void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2109 const RegionCodeGenTy &TaskgroupOpGen,
2110 SourceLocation Loc) {
2111 if (!CGF.HaveInsertPoint())
2112 return;
2113 // __kmpc_taskgroup(ident_t *, gtid);
2114 // TaskgroupOpGen();
2115 // __kmpc_end_taskgroup(ident_t *, gtid);
2116 // Prepare arguments and build a call to __kmpc_taskgroup
2117 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2118 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2119 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2120 Args,
2121 OMPBuilder.getOrCreateRuntimeFunction(
2122 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2123 Args);
2124 TaskgroupOpGen.setAction(Action);
2125 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2126}
2127
2128/// Given an array of pointers to variables, project the address of a
2129/// given variable.
2130static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2131 unsigned Index, const VarDecl *Var) {
2132 // Pull out the pointer to the variable.
2133 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2134 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2135
2136 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2137 return Address(
2138 CGF.Builder.CreateBitCast(
2139 V: Ptr, DestTy: ElemTy->getPointerTo(AddrSpace: Ptr->getType()->getPointerAddressSpace())),
2140 ElemTy, CGF.getContext().getDeclAlign(Var));
2141}
2142
2143static llvm::Value *emitCopyprivateCopyFunction(
2144 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2145 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2146 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2147 SourceLocation Loc) {
2148 ASTContext &C = CGM.getContext();
2149 // void copy_func(void *LHSArg, void *RHSArg);
2150 FunctionArgList Args;
2151 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2152 ImplicitParamKind::Other);
2153 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2154 ImplicitParamKind::Other);
2155 Args.push_back(&LHSArg);
2156 Args.push_back(&RHSArg);
2157 const auto &CGFI =
2158 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2159 std::string Name =
2160 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2161 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2162 llvm::GlobalValue::InternalLinkage, Name,
2163 &CGM.getModule());
2164 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2165 Fn->setDoesNotRecurse();
2166 CodeGenFunction CGF(CGM);
2167 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2168 // Dest = (void*[n])(LHSArg);
2169 // Src = (void*[n])(RHSArg);
2170 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2171 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
2172 DestTy: ArgsElemType->getPointerTo()),
2173 ArgsElemType, CGF.getPointerAlign());
2174 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2175 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
2176 DestTy: ArgsElemType->getPointerTo()),
2177 ArgsElemType, CGF.getPointerAlign());
2178 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2179 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2180 // ...
2181 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2182 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2183 const auto *DestVar =
2184 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2185 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2186
2187 const auto *SrcVar =
2188 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2189 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2190
2191 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2192 QualType Type = VD->getType();
2193 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2194 }
2195 CGF.FinishFunction();
2196 return Fn;
2197}
2198
2199void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2200 const RegionCodeGenTy &SingleOpGen,
2201 SourceLocation Loc,
2202 ArrayRef<const Expr *> CopyprivateVars,
2203 ArrayRef<const Expr *> SrcExprs,
2204 ArrayRef<const Expr *> DstExprs,
2205 ArrayRef<const Expr *> AssignmentOps) {
2206 if (!CGF.HaveInsertPoint())
2207 return;
2208 assert(CopyprivateVars.size() == SrcExprs.size() &&
2209 CopyprivateVars.size() == DstExprs.size() &&
2210 CopyprivateVars.size() == AssignmentOps.size());
2211 ASTContext &C = CGM.getContext();
2212 // int32 did_it = 0;
2213 // if(__kmpc_single(ident_t *, gtid)) {
2214 // SingleOpGen();
2215 // __kmpc_end_single(ident_t *, gtid);
2216 // did_it = 1;
2217 // }
2218 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2219 // <copy_func>, did_it);
2220
2221 Address DidIt = Address::invalid();
2222 if (!CopyprivateVars.empty()) {
2223 // int32 did_it = 0;
2224 QualType KmpInt32Ty =
2225 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2226 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2227 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2228 }
2229 // Prepare arguments and build a call to __kmpc_single
2230 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2231 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2232 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2233 Args,
2234 OMPBuilder.getOrCreateRuntimeFunction(
2235 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2236 Args,
2237 /*Conditional=*/true);
2238 SingleOpGen.setAction(Action);
2239 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2240 if (DidIt.isValid()) {
2241 // did_it = 1;
2242 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2243 }
2244 Action.Done(CGF);
2245 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2246 // <copy_func>, did_it);
2247 if (DidIt.isValid()) {
2248 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2249 QualType CopyprivateArrayTy = C.getConstantArrayType(
2250 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2251 /*IndexTypeQuals=*/0);
2252 // Create a list of all private variables for copyprivate.
2253 Address CopyprivateList =
2254 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2255 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2256 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2257 CGF.Builder.CreateStore(
2258 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2259 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2260 DestTy: CGF.VoidPtrTy),
2261 Addr: Elem);
2262 }
2263 // Build function that copies private values from single region to all other
2264 // threads in the corresponding parallel region.
2265 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2266 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2267 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2268 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2269 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2270 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2271 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2272 llvm::Value *Args[] = {
2273 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2274 getThreadID(CGF, Loc), // i32 <gtid>
2275 BufSize, // size_t <buf_size>
2276 CL.emitRawPointer(CGF), // void *<copyprivate list>
2277 CpyFn, // void (*) (void *, void *) <copy_func>
2278 DidItVal // i32 did_it
2279 };
2280 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2281 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2282 args: Args);
2283 }
2284}
2285
2286void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2287 const RegionCodeGenTy &OrderedOpGen,
2288 SourceLocation Loc, bool IsThreads) {
2289 if (!CGF.HaveInsertPoint())
2290 return;
2291 // __kmpc_ordered(ident_t *, gtid);
2292 // OrderedOpGen();
2293 // __kmpc_end_ordered(ident_t *, gtid);
2294 // Prepare arguments and build a call to __kmpc_ordered
2295 if (IsThreads) {
2296 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2297 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2298 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2299 Args,
2300 OMPBuilder.getOrCreateRuntimeFunction(
2301 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2302 Args);
2303 OrderedOpGen.setAction(Action);
2304 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2305 return;
2306 }
2307 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2308}
2309
2310unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2311 unsigned Flags;
2312 if (Kind == OMPD_for)
2313 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2314 else if (Kind == OMPD_sections)
2315 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2316 else if (Kind == OMPD_single)
2317 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2318 else if (Kind == OMPD_barrier)
2319 Flags = OMP_IDENT_BARRIER_EXPL;
2320 else
2321 Flags = OMP_IDENT_BARRIER_IMPL;
2322 return Flags;
2323}
2324
2325void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2326 CodeGenFunction &CGF, const OMPLoopDirective &S,
2327 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2328 // Check if the loop directive is actually a doacross loop directive. In this
2329 // case choose static, 1 schedule.
2330 if (llvm::any_of(
2331 S.getClausesOfKind<OMPOrderedClause>(),
2332 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2333 ScheduleKind = OMPC_SCHEDULE_static;
2334 // Chunk size is 1 in this case.
2335 llvm::APInt ChunkSize(32, 1);
2336 ChunkExpr = IntegerLiteral::Create(
2337 C: CGF.getContext(), V: ChunkSize,
2338 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2339 l: SourceLocation());
2340 }
2341}
2342
2343void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2344 OpenMPDirectiveKind Kind, bool EmitChecks,
2345 bool ForceSimpleCall) {
2346 // Check if we should use the OMPBuilder
2347 auto *OMPRegionInfo =
2348 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2349 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2350 CGF.Builder.restoreIP(IP: OMPBuilder.createBarrier(
2351 CGF.Builder, Kind, ForceSimpleCall, EmitChecks));
2352 return;
2353 }
2354
2355 if (!CGF.HaveInsertPoint())
2356 return;
2357 // Build call __kmpc_cancel_barrier(loc, thread_id);
2358 // Build call __kmpc_barrier(loc, thread_id);
2359 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2360 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2361 // thread_id);
2362 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2363 getThreadID(CGF, Loc)};
2364 if (OMPRegionInfo) {
2365 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2366 llvm::Value *Result = CGF.EmitRuntimeCall(
2367 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2368 FnID: OMPRTL___kmpc_cancel_barrier),
2369 args: Args);
2370 if (EmitChecks) {
2371 // if (__kmpc_cancel_barrier()) {
2372 // exit from construct;
2373 // }
2374 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2375 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2376 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2377 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2378 CGF.EmitBlock(BB: ExitBB);
2379 // exit from construct;
2380 CodeGenFunction::JumpDest CancelDestination =
2381 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2382 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2383 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2384 }
2385 return;
2386 }
2387 }
2388 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2389 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2390 args: Args);
2391}
2392
2393void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2394 Expr *ME, bool IsFatal) {
2395 llvm::Value *MVL =
2396 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2397 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2398 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2399 // *message)
2400 llvm::Value *Args[] = {
2401 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2402 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2403 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2404 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2405 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2406 args: Args);
2407}
2408
2409/// Map the OpenMP loop schedule to the runtime enumeration.
2410static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2411 bool Chunked, bool Ordered) {
2412 switch (ScheduleKind) {
2413 case OMPC_SCHEDULE_static:
2414 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2415 : (Ordered ? OMP_ord_static : OMP_sch_static);
2416 case OMPC_SCHEDULE_dynamic:
2417 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2418 case OMPC_SCHEDULE_guided:
2419 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2420 case OMPC_SCHEDULE_runtime:
2421 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2422 case OMPC_SCHEDULE_auto:
2423 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2424 case OMPC_SCHEDULE_unknown:
2425 assert(!Chunked && "chunk was specified but schedule kind not known");
2426 return Ordered ? OMP_ord_static : OMP_sch_static;
2427 }
2428 llvm_unreachable("Unexpected runtime schedule");
2429}
2430
2431/// Map the OpenMP distribute schedule to the runtime enumeration.
2432static OpenMPSchedType
2433getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2434 // only static is allowed for dist_schedule
2435 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2436}
2437
2438bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2439 bool Chunked) const {
2440 OpenMPSchedType Schedule =
2441 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2442 return Schedule == OMP_sch_static;
2443}
2444
2445bool CGOpenMPRuntime::isStaticNonchunked(
2446 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2447 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2448 return Schedule == OMP_dist_sch_static;
2449}
2450
2451bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2452 bool Chunked) const {
2453 OpenMPSchedType Schedule =
2454 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2455 return Schedule == OMP_sch_static_chunked;
2456}
2457
2458bool CGOpenMPRuntime::isStaticChunked(
2459 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2460 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2461 return Schedule == OMP_dist_sch_static_chunked;
2462}
2463
2464bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2465 OpenMPSchedType Schedule =
2466 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2467 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2468 return Schedule != OMP_sch_static;
2469}
2470
2471static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2472 OpenMPScheduleClauseModifier M1,
2473 OpenMPScheduleClauseModifier M2) {
2474 int Modifier = 0;
2475 switch (M1) {
2476 case OMPC_SCHEDULE_MODIFIER_monotonic:
2477 Modifier = OMP_sch_modifier_monotonic;
2478 break;
2479 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2480 Modifier = OMP_sch_modifier_nonmonotonic;
2481 break;
2482 case OMPC_SCHEDULE_MODIFIER_simd:
2483 if (Schedule == OMP_sch_static_chunked)
2484 Schedule = OMP_sch_static_balanced_chunked;
2485 break;
2486 case OMPC_SCHEDULE_MODIFIER_last:
2487 case OMPC_SCHEDULE_MODIFIER_unknown:
2488 break;
2489 }
2490 switch (M2) {
2491 case OMPC_SCHEDULE_MODIFIER_monotonic:
2492 Modifier = OMP_sch_modifier_monotonic;
2493 break;
2494 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2495 Modifier = OMP_sch_modifier_nonmonotonic;
2496 break;
2497 case OMPC_SCHEDULE_MODIFIER_simd:
2498 if (Schedule == OMP_sch_static_chunked)
2499 Schedule = OMP_sch_static_balanced_chunked;
2500 break;
2501 case OMPC_SCHEDULE_MODIFIER_last:
2502 case OMPC_SCHEDULE_MODIFIER_unknown:
2503 break;
2504 }
2505 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2506 // If the static schedule kind is specified or if the ordered clause is
2507 // specified, and if the nonmonotonic modifier is not specified, the effect is
2508 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2509 // modifier is specified, the effect is as if the nonmonotonic modifier is
2510 // specified.
2511 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2512 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2513 Schedule == OMP_sch_static_balanced_chunked ||
2514 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2515 Schedule == OMP_dist_sch_static_chunked ||
2516 Schedule == OMP_dist_sch_static))
2517 Modifier = OMP_sch_modifier_nonmonotonic;
2518 }
2519 return Schedule | Modifier;
2520}
2521
2522void CGOpenMPRuntime::emitForDispatchInit(
2523 CodeGenFunction &CGF, SourceLocation Loc,
2524 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2525 bool Ordered, const DispatchRTInput &DispatchValues) {
2526 if (!CGF.HaveInsertPoint())
2527 return;
2528 OpenMPSchedType Schedule = getRuntimeSchedule(
2529 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2530 assert(Ordered ||
2531 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2532 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2533 Schedule != OMP_sch_static_balanced_chunked));
2534 // Call __kmpc_dispatch_init(
2535 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2536 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2537 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2538
2539 // If the Chunk was not specified in the clause - use default value 1.
2540 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2541 : CGF.Builder.getIntN(N: IVSize, C: 1);
2542 llvm::Value *Args[] = {
2543 emitUpdateLocation(CGF, Loc),
2544 getThreadID(CGF, Loc),
2545 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2546 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2547 DispatchValues.LB, // Lower
2548 DispatchValues.UB, // Upper
2549 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2550 Chunk // Chunk
2551 };
2552 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2553 args: Args);
2554}
2555
2556static void emitForStaticInitCall(
2557 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2558 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2559 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2560 const CGOpenMPRuntime::StaticRTInput &Values) {
2561 if (!CGF.HaveInsertPoint())
2562 return;
2563
2564 assert(!Values.Ordered);
2565 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2566 Schedule == OMP_sch_static_balanced_chunked ||
2567 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2568 Schedule == OMP_dist_sch_static ||
2569 Schedule == OMP_dist_sch_static_chunked);
2570
2571 // Call __kmpc_for_static_init(
2572 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2573 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2574 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2575 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2576 llvm::Value *Chunk = Values.Chunk;
2577 if (Chunk == nullptr) {
2578 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2579 Schedule == OMP_dist_sch_static) &&
2580 "expected static non-chunked schedule");
2581 // If the Chunk was not specified in the clause - use default value 1.
2582 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2583 } else {
2584 assert((Schedule == OMP_sch_static_chunked ||
2585 Schedule == OMP_sch_static_balanced_chunked ||
2586 Schedule == OMP_ord_static_chunked ||
2587 Schedule == OMP_dist_sch_static_chunked) &&
2588 "expected static chunked schedule");
2589 }
2590 llvm::Value *Args[] = {
2591 UpdateLocation,
2592 ThreadId,
2593 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2594 M2)), // Schedule type
2595 Values.IL.emitRawPointer(CGF), // &isLastIter
2596 Values.LB.emitRawPointer(CGF), // &LB
2597 Values.UB.emitRawPointer(CGF), // &UB
2598 Values.ST.emitRawPointer(CGF), // &Stride
2599 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2600 Chunk // Chunk
2601 };
2602 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2603}
2604
2605void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2606 SourceLocation Loc,
2607 OpenMPDirectiveKind DKind,
2608 const OpenMPScheduleTy &ScheduleKind,
2609 const StaticRTInput &Values) {
2610 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2611 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2612 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2613 "Expected loop-based or sections-based directive.");
2614 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2615 isOpenMPLoopDirective(DKind)
2616 ? OMP_IDENT_WORK_LOOP
2617 : OMP_IDENT_WORK_SECTIONS);
2618 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2619 llvm::FunctionCallee StaticInitFunction =
2620 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2621 IsGPUDistribute: false);
2622 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2623 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2624 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2625}
2626
2627void CGOpenMPRuntime::emitDistributeStaticInit(
2628 CodeGenFunction &CGF, SourceLocation Loc,
2629 OpenMPDistScheduleClauseKind SchedKind,
2630 const CGOpenMPRuntime::StaticRTInput &Values) {
2631 OpenMPSchedType ScheduleNum =
2632 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2633 llvm::Value *UpdatedLocation =
2634 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2635 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2636 llvm::FunctionCallee StaticInitFunction;
2637 bool isGPUDistribute =
2638 CGM.getLangOpts().OpenMPIsTargetDevice &&
2639 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
2640 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2641 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2642
2643 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2644 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2645 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2646}
2647
2648void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2649 SourceLocation Loc,
2650 OpenMPDirectiveKind DKind) {
2651 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2652 DKind == OMPD_sections) &&
2653 "Expected distribute, for, or sections directive kind");
2654 if (!CGF.HaveInsertPoint())
2655 return;
2656 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2657 llvm::Value *Args[] = {
2658 emitUpdateLocation(CGF, Loc,
2659 isOpenMPDistributeDirective(DKind) ||
2660 (DKind == OMPD_target_teams_loop)
2661 ? OMP_IDENT_WORK_DISTRIBUTE
2662 : isOpenMPLoopDirective(DKind)
2663 ? OMP_IDENT_WORK_LOOP
2664 : OMP_IDENT_WORK_SECTIONS),
2665 getThreadID(CGF, Loc)};
2666 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2667 if (isOpenMPDistributeDirective(DKind) &&
2668 CGM.getLangOpts().OpenMPIsTargetDevice &&
2669 (CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
2670 CGF.EmitRuntimeCall(
2671 OMPBuilder.getOrCreateRuntimeFunction(
2672 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2673 Args);
2674 else
2675 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2676 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2677 Args);
2678}
2679
2680void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2681 SourceLocation Loc,
2682 unsigned IVSize,
2683 bool IVSigned) {
2684 if (!CGF.HaveInsertPoint())
2685 return;
2686 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2687 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2688 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2689 args: Args);
2690}
2691
2692llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2693 SourceLocation Loc, unsigned IVSize,
2694 bool IVSigned, Address IL,
2695 Address LB, Address UB,
2696 Address ST) {
2697 // Call __kmpc_dispatch_next(
2698 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2699 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2700 // kmp_int[32|64] *p_stride);
2701 llvm::Value *Args[] = {
2702 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2703 IL.emitRawPointer(CGF), // &isLastIter
2704 LB.emitRawPointer(CGF), // &Lower
2705 UB.emitRawPointer(CGF), // &Upper
2706 ST.emitRawPointer(CGF) // &Stride
2707 };
2708 llvm::Value *Call = CGF.EmitRuntimeCall(
2709 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2710 return CGF.EmitScalarConversion(
2711 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2712 DstTy: CGF.getContext().BoolTy, Loc);
2713}
2714
2715void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2716 llvm::Value *NumThreads,
2717 SourceLocation Loc) {
2718 if (!CGF.HaveInsertPoint())
2719 return;
2720 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2721 llvm::Value *Args[] = {
2722 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2723 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2724 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2725 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2726 args: Args);
2727}
2728
2729void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2730 ProcBindKind ProcBind,
2731 SourceLocation Loc) {
2732 if (!CGF.HaveInsertPoint())
2733 return;
2734 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2735 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2736 llvm::Value *Args[] = {
2737 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2738 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2740 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2741 args: Args);
2742}
2743
2744void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2745 SourceLocation Loc, llvm::AtomicOrdering AO) {
2746 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2747 OMPBuilder.createFlush(Loc: CGF.Builder);
2748 } else {
2749 if (!CGF.HaveInsertPoint())
2750 return;
2751 // Build call void __kmpc_flush(ident_t *loc)
2752 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2753 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2754 args: emitUpdateLocation(CGF, Loc));
2755 }
2756}
2757
2758namespace {
2759/// Indexes of fields for type kmp_task_t.
2760enum KmpTaskTFields {
2761 /// List of shared variables.
2762 KmpTaskTShareds,
2763 /// Task routine.
2764 KmpTaskTRoutine,
2765 /// Partition id for the untied tasks.
2766 KmpTaskTPartId,
2767 /// Function with call of destructors for private variables.
2768 Data1,
2769 /// Task priority.
2770 Data2,
2771 /// (Taskloops only) Lower bound.
2772 KmpTaskTLowerBound,
2773 /// (Taskloops only) Upper bound.
2774 KmpTaskTUpperBound,
2775 /// (Taskloops only) Stride.
2776 KmpTaskTStride,
2777 /// (Taskloops only) Is last iteration flag.
2778 KmpTaskTLastIter,
2779 /// (Taskloops only) Reduction data.
2780 KmpTaskTReductions,
2781};
2782} // anonymous namespace
2783
2784void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2785 // If we are in simd mode or there are no entries, we don't need to do
2786 // anything.
2787 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2788 return;
2789
2790 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2791 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2792 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2793 SourceLocation Loc;
2794 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2795 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2796 E = CGM.getContext().getSourceManager().fileinfo_end();
2797 I != E; ++I) {
2798 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2799 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2800 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2801 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2802 break;
2803 }
2804 }
2805 }
2806 switch (Kind) {
2807 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2808 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2809 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2810 "%0 is incorrect: either the "
2811 "address or the ID is invalid.");
2812 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2813 } break;
2814 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2815 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2816 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2817 "variable %0 is incorrect: the "
2818 "address is invalid.");
2819 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2820 } break;
2821 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2822 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2823 L: DiagnosticsEngine::Error,
2824 FormatString: "Offloading entry for declare target variable is incorrect: the "
2825 "address is invalid.");
2826 CGM.getDiags().Report(DiagID);
2827 } break;
2828 }
2829 };
2830
2831 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2832}
2833
2834void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2835 if (!KmpRoutineEntryPtrTy) {
2836 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2837 ASTContext &C = CGM.getContext();
2838 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2839 FunctionProtoType::ExtProtoInfo EPI;
2840 KmpRoutineEntryPtrQTy = C.getPointerType(
2841 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2842 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2843 }
2844}
2845
2846namespace {
2847struct PrivateHelpersTy {
2848 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2849 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2850 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2851 PrivateElemInit(PrivateElemInit) {}
2852 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2853 const Expr *OriginalRef = nullptr;
2854 const VarDecl *Original = nullptr;
2855 const VarDecl *PrivateCopy = nullptr;
2856 const VarDecl *PrivateElemInit = nullptr;
2857 bool isLocalPrivate() const {
2858 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2859 }
2860};
2861typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2862} // anonymous namespace
2863
2864static bool isAllocatableDecl(const VarDecl *VD) {
2865 const VarDecl *CVD = VD->getCanonicalDecl();
2866 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2867 return false;
2868 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2869 // Use the default allocation.
2870 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2871 !AA->getAllocator());
2872}
2873
2874static RecordDecl *
2875createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2876 if (!Privates.empty()) {
2877 ASTContext &C = CGM.getContext();
2878 // Build struct .kmp_privates_t. {
2879 // /* private vars */
2880 // };
2881 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2882 RD->startDefinition();
2883 for (const auto &Pair : Privates) {
2884 const VarDecl *VD = Pair.second.Original;
2885 QualType Type = VD->getType().getNonReferenceType();
2886 // If the private variable is a local variable with lvalue ref type,
2887 // allocate the pointer instead of the pointee type.
2888 if (Pair.second.isLocalPrivate()) {
2889 if (VD->getType()->isLValueReferenceType())
2890 Type = C.getPointerType(T: Type);
2891 if (isAllocatableDecl(VD))
2892 Type = C.getPointerType(T: Type);
2893 }
2894 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2895 if (VD->hasAttrs()) {
2896 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2897 E(VD->getAttrs().end());
2898 I != E; ++I)
2899 FD->addAttr(*I);
2900 }
2901 }
2902 RD->completeDefinition();
2903 return RD;
2904 }
2905 return nullptr;
2906}
2907
2908static RecordDecl *
2909createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2910 QualType KmpInt32Ty,
2911 QualType KmpRoutineEntryPointerQTy) {
2912 ASTContext &C = CGM.getContext();
2913 // Build struct kmp_task_t {
2914 // void * shareds;
2915 // kmp_routine_entry_t routine;
2916 // kmp_int32 part_id;
2917 // kmp_cmplrdata_t data1;
2918 // kmp_cmplrdata_t data2;
2919 // For taskloops additional fields:
2920 // kmp_uint64 lb;
2921 // kmp_uint64 ub;
2922 // kmp_int64 st;
2923 // kmp_int32 liter;
2924 // void * reductions;
2925 // };
2926 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2927 UD->startDefinition();
2928 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2929 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2930 UD->completeDefinition();
2931 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2932 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2933 RD->startDefinition();
2934 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2935 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2936 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2937 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2938 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2939 if (isOpenMPTaskLoopDirective(Kind)) {
2940 QualType KmpUInt64Ty =
2941 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2942 QualType KmpInt64Ty =
2943 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2944 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2945 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2946 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2947 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2948 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2949 }
2950 RD->completeDefinition();
2951 return RD;
2952}
2953
2954static RecordDecl *
2955createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2956 ArrayRef<PrivateDataTy> Privates) {
2957 ASTContext &C = CGM.getContext();
2958 // Build struct kmp_task_t_with_privates {
2959 // kmp_task_t task_data;
2960 // .kmp_privates_t. privates;
2961 // };
2962 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2963 RD->startDefinition();
2964 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2965 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2966 addFieldToRecordDecl(C, RD, C.getRecordType(Decl: PrivateRD));
2967 RD->completeDefinition();
2968 return RD;
2969}
2970
2971/// Emit a proxy function which accepts kmp_task_t as the second
2972/// argument.
2973/// \code
2974/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2975/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2976/// For taskloops:
2977/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2978/// tt->reductions, tt->shareds);
2979/// return 0;
2980/// }
2981/// \endcode
2982static llvm::Function *
2983emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2984 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2985 QualType KmpTaskTWithPrivatesPtrQTy,
2986 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2987 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2988 llvm::Value *TaskPrivatesMap) {
2989 ASTContext &C = CGM.getContext();
2990 FunctionArgList Args;
2991 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2992 ImplicitParamKind::Other);
2993 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2994 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2995 ImplicitParamKind::Other);
2996 Args.push_back(&GtidArg);
2997 Args.push_back(&TaskTypeArg);
2998 const auto &TaskEntryFnInfo =
2999 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3000 llvm::FunctionType *TaskEntryTy =
3001 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
3002 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
3003 auto *TaskEntry = llvm::Function::Create(
3004 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3005 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
3006 TaskEntry->setDoesNotRecurse();
3007 CodeGenFunction CGF(CGM);
3008 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
3009 Loc, StartLoc: Loc);
3010
3011 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3012 // tt,
3013 // For taskloops:
3014 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3015 // tt->task_data.shareds);
3016 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3017 Addr: CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3018 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3019 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3020 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3021 const auto *KmpTaskTWithPrivatesQTyRD =
3022 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3023 LValue Base =
3024 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3025 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3026 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3027 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3028 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3029
3030 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3031 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3032 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3033 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3034 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3035
3036 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3037 llvm::Value *PrivatesParam;
3038 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3039 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3040 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3041 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3042 } else {
3043 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3044 }
3045
3046 llvm::Value *CommonArgs[] = {
3047 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3048 CGF.Builder
3049 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(CGF),
3050 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3051 .emitRawPointer(CGF)};
3052 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3053 std::end(arr&: CommonArgs));
3054 if (isOpenMPTaskLoopDirective(Kind)) {
3055 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3056 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3057 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3058 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3059 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3060 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3061 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3062 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3063 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3064 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3065 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3066 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3067 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3068 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3069 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3070 CallArgs.push_back(Elt: LBParam);
3071 CallArgs.push_back(Elt: UBParam);
3072 CallArgs.push_back(Elt: StParam);
3073 CallArgs.push_back(Elt: LIParam);
3074 CallArgs.push_back(Elt: RParam);
3075 }
3076 CallArgs.push_back(Elt: SharedsParam);
3077
3078 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3079 Args: CallArgs);
3080 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3081 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3082 CGF.FinishFunction();
3083 return TaskEntry;
3084}
3085
3086static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3087 SourceLocation Loc,
3088 QualType KmpInt32Ty,
3089 QualType KmpTaskTWithPrivatesPtrQTy,
3090 QualType KmpTaskTWithPrivatesQTy) {
3091 ASTContext &C = CGM.getContext();
3092 FunctionArgList Args;
3093 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3094 ImplicitParamKind::Other);
3095 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3096 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3097 ImplicitParamKind::Other);
3098 Args.push_back(&GtidArg);
3099 Args.push_back(&TaskTypeArg);
3100 const auto &DestructorFnInfo =
3101 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3102 llvm::FunctionType *DestructorFnTy =
3103 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3104 std::string Name =
3105 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3106 auto *DestructorFn =
3107 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3108 N: Name, M: &CGM.getModule());
3109 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3110 FI: DestructorFnInfo);
3111 DestructorFn->setDoesNotRecurse();
3112 CodeGenFunction CGF(CGM);
3113 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3114 Args, Loc, StartLoc: Loc);
3115
3116 LValue Base = CGF.EmitLoadOfPointerLValue(
3117 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3118 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3119 const auto *KmpTaskTWithPrivatesQTyRD =
3120 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3121 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3122 Base = CGF.EmitLValueForField(Base, Field: *FI);
3123 for (const auto *Field :
3124 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3125 if (QualType::DestructionKind DtorKind =
3126 Field->getType().isDestructedType()) {
3127 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3128 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(CGF), Field->getType());
3129 }
3130 }
3131 CGF.FinishFunction();
3132 return DestructorFn;
3133}
3134
3135/// Emit a privates mapping function for correct handling of private and
3136/// firstprivate variables.
3137/// \code
3138/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3139/// **noalias priv1,..., <tyn> **noalias privn) {
3140/// *priv1 = &.privates.priv1;
3141/// ...;
3142/// *privn = &.privates.privn;
3143/// }
3144/// \endcode
3145static llvm::Value *
3146emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3147 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3148 ArrayRef<PrivateDataTy> Privates) {
3149 ASTContext &C = CGM.getContext();
3150 FunctionArgList Args;
3151 ImplicitParamDecl TaskPrivatesArg(
3152 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3153 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3154 ImplicitParamKind::Other);
3155 Args.push_back(&TaskPrivatesArg);
3156 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3157 unsigned Counter = 1;
3158 for (const Expr *E : Data.PrivateVars) {
3159 Args.push_back(ImplicitParamDecl::Create(
3160 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3161 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3162 .withConst()
3163 .withRestrict(),
3164 ParamKind: ImplicitParamKind::Other));
3165 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3166 PrivateVarsPos[VD] = Counter;
3167 ++Counter;
3168 }
3169 for (const Expr *E : Data.FirstprivateVars) {
3170 Args.push_back(ImplicitParamDecl::Create(
3171 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3172 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3173 .withConst()
3174 .withRestrict(),
3175 ParamKind: ImplicitParamKind::Other));
3176 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3177 PrivateVarsPos[VD] = Counter;
3178 ++Counter;
3179 }
3180 for (const Expr *E : Data.LastprivateVars) {
3181 Args.push_back(ImplicitParamDecl::Create(
3182 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3183 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3184 .withConst()
3185 .withRestrict(),
3186 ParamKind: ImplicitParamKind::Other));
3187 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 for (const VarDecl *VD : Data.PrivateLocals) {
3192 QualType Ty = VD->getType().getNonReferenceType();
3193 if (VD->getType()->isLValueReferenceType())
3194 Ty = C.getPointerType(T: Ty);
3195 if (isAllocatableDecl(VD))
3196 Ty = C.getPointerType(T: Ty);
3197 Args.push_back(ImplicitParamDecl::Create(
3198 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3199 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3200 ParamKind: ImplicitParamKind::Other));
3201 PrivateVarsPos[VD] = Counter;
3202 ++Counter;
3203 }
3204 const auto &TaskPrivatesMapFnInfo =
3205 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3206 llvm::FunctionType *TaskPrivatesMapTy =
3207 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3208 std::string Name =
3209 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3210 auto *TaskPrivatesMap = llvm::Function::Create(
3211 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3212 M: &CGM.getModule());
3213 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3214 FI: TaskPrivatesMapFnInfo);
3215 if (CGM.getLangOpts().Optimize) {
3216 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3217 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3218 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3219 }
3220 CodeGenFunction CGF(CGM);
3221 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3222 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3223
3224 // *privi = &.privates.privi;
3225 LValue Base = CGF.EmitLoadOfPointerLValue(
3226 Ptr: CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3227 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3228 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3229 Counter = 0;
3230 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3231 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3232 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3233 LValue RefLVal =
3234 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3235 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3236 Ptr: RefLVal.getAddress(CGF), PtrTy: RefLVal.getType()->castAs<PointerType>());
3237 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3238 ++Counter;
3239 }
3240 CGF.FinishFunction();
3241 return TaskPrivatesMap;
3242}
3243
3244/// Emit initialization for private variables in task-based directives.
3245static void emitPrivatesInit(CodeGenFunction &CGF,
3246 const OMPExecutableDirective &D,
3247 Address KmpTaskSharedsPtr, LValue TDBase,
3248 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3249 QualType SharedsTy, QualType SharedsPtrTy,
3250 const OMPTaskDataTy &Data,
3251 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3252 ASTContext &C = CGF.getContext();
3253 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3254 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3255 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3256 ? OMPD_taskloop
3257 : OMPD_task;
3258 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3259 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3260 LValue SrcBase;
3261 bool IsTargetTask =
3262 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3263 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3264 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3265 // PointersArray, SizesArray, and MappersArray. The original variables for
3266 // these arrays are not captured and we get their addresses explicitly.
3267 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3268 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3269 SrcBase = CGF.MakeAddrLValue(
3270 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3271 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3272 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3273 T: SharedsTy);
3274 }
3275 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3276 for (const PrivateDataTy &Pair : Privates) {
3277 // Do not initialize private locals.
3278 if (Pair.second.isLocalPrivate()) {
3279 ++FI;
3280 continue;
3281 }
3282 const VarDecl *VD = Pair.second.PrivateCopy;
3283 const Expr *Init = VD->getAnyInitializer();
3284 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3285 !CGF.isTrivialInitializer(Init)))) {
3286 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3287 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3288 const VarDecl *OriginalVD = Pair.second.Original;
3289 // Check if the variable is the target-based BasePointersArray,
3290 // PointersArray, SizesArray, or MappersArray.
3291 LValue SharedRefLValue;
3292 QualType Type = PrivateLValue.getType();
3293 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3294 if (IsTargetTask && !SharedField) {
3295 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3296 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3297 cast<CapturedDecl>(OriginalVD->getDeclContext())
3298 ->getNumParams() == 0 &&
3299 isa<TranslationUnitDecl>(
3300 cast<CapturedDecl>(OriginalVD->getDeclContext())
3301 ->getDeclContext()) &&
3302 "Expected artificial target data variable.");
3303 SharedRefLValue =
3304 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3305 } else if (ForDup) {
3306 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3307 SharedRefLValue = CGF.MakeAddrLValue(
3308 Addr: SharedRefLValue.getAddress(CGF).withAlignment(
3309 NewAlignment: C.getDeclAlign(OriginalVD)),
3310 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3311 TBAAInfo: SharedRefLValue.getTBAAInfo());
3312 } else if (CGF.LambdaCaptureFields.count(
3313 Pair.second.Original->getCanonicalDecl()) > 0 ||
3314 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3315 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3316 } else {
3317 // Processing for implicitly captured variables.
3318 InlinedOpenMPRegionRAII Region(
3319 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3320 /*HasCancel=*/false, /*NoInheritance=*/true);
3321 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3322 }
3323 if (Type->isArrayType()) {
3324 // Initialize firstprivate array.
3325 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3326 // Perform simple memcpy.
3327 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3328 } else {
3329 // Initialize firstprivate array using element-by-element
3330 // initialization.
3331 CGF.EmitOMPAggregateAssign(
3332 DestAddr: PrivateLValue.getAddress(CGF), SrcAddr: SharedRefLValue.getAddress(CGF),
3333 OriginalType: Type,
3334 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3335 Address SrcElement) {
3336 // Clean up any temporaries needed by the initialization.
3337 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3338 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3339 (void)InitScope.Privatize();
3340 // Emit initialization for single element.
3341 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3342 CGF, &CapturesInfo);
3343 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3344 Quals: Init->getType().getQualifiers(),
3345 /*IsInitializer=*/false);
3346 });
3347 }
3348 } else {
3349 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3350 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress(CGF));
3351 (void)InitScope.Privatize();
3352 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3353 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3354 /*capturedByInit=*/false);
3355 }
3356 } else {
3357 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3358 }
3359 }
3360 ++FI;
3361 }
3362}
3363
3364/// Check if duplication function is required for taskloops.
3365static bool checkInitIsRequired(CodeGenFunction &CGF,
3366 ArrayRef<PrivateDataTy> Privates) {
3367 bool InitRequired = false;
3368 for (const PrivateDataTy &Pair : Privates) {
3369 if (Pair.second.isLocalPrivate())
3370 continue;
3371 const VarDecl *VD = Pair.second.PrivateCopy;
3372 const Expr *Init = VD->getAnyInitializer();
3373 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3374 !CGF.isTrivialInitializer(Init));
3375 if (InitRequired)
3376 break;
3377 }
3378 return InitRequired;
3379}
3380
3381
3382/// Emit task_dup function (for initialization of
3383/// private/firstprivate/lastprivate vars and last_iter flag)
3384/// \code
3385/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3386/// lastpriv) {
3387/// // setup lastprivate flag
3388/// task_dst->last = lastpriv;
3389/// // could be constructor calls here...
3390/// }
3391/// \endcode
3392static llvm::Value *
3393emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3394 const OMPExecutableDirective &D,
3395 QualType KmpTaskTWithPrivatesPtrQTy,
3396 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3397 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3398 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3399 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3400 ASTContext &C = CGM.getContext();
3401 FunctionArgList Args;
3402 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3403 KmpTaskTWithPrivatesPtrQTy,
3404 ImplicitParamKind::Other);
3405 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3406 KmpTaskTWithPrivatesPtrQTy,
3407 ImplicitParamKind::Other);
3408 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3409 ImplicitParamKind::Other);
3410 Args.push_back(&DstArg);
3411 Args.push_back(&SrcArg);
3412 Args.push_back(&LastprivArg);
3413 const auto &TaskDupFnInfo =
3414 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3415 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3416 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3417 auto *TaskDup = llvm::Function::Create(
3418 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3419 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3420 TaskDup->setDoesNotRecurse();
3421 CodeGenFunction CGF(CGM);
3422 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3423 StartLoc: Loc);
3424
3425 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3426 Ptr: CGF.GetAddrOfLocalVar(&DstArg),
3427 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3428 // task_dst->liter = lastpriv;
3429 if (WithLastIter) {
3430 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3431 LValue Base = CGF.EmitLValueForField(
3432 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3433 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3434 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3435 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3436 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3437 }
3438
3439 // Emit initial values for private copies (if any).
3440 assert(!Privates.empty());
3441 Address KmpTaskSharedsPtr = Address::invalid();
3442 if (!Data.FirstprivateVars.empty()) {
3443 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3444 Ptr: CGF.GetAddrOfLocalVar(&SrcArg),
3445 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3446 LValue Base = CGF.EmitLValueForField(
3447 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3448 KmpTaskSharedsPtr = Address(
3449 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3450 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3451 n: KmpTaskTShareds)),
3452 Loc),
3453 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3454 }
3455 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3456 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3457 CGF.FinishFunction();
3458 return TaskDup;
3459}
3460
3461/// Checks if destructor function is required to be generated.
3462/// \return true if cleanups are required, false otherwise.
3463static bool
3464checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3465 ArrayRef<PrivateDataTy> Privates) {
3466 for (const PrivateDataTy &P : Privates) {
3467 if (P.second.isLocalPrivate())
3468 continue;
3469 QualType Ty = P.second.Original->getType().getNonReferenceType();
3470 if (Ty.isDestructedType())
3471 return true;
3472 }
3473 return false;
3474}
3475
3476namespace {
3477/// Loop generator for OpenMP iterator expression.
3478class OMPIteratorGeneratorScope final
3479 : public CodeGenFunction::OMPPrivateScope {
3480 CodeGenFunction &CGF;
3481 const OMPIteratorExpr *E = nullptr;
3482 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3483 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3484 OMPIteratorGeneratorScope() = delete;
3485 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3486
3487public:
3488 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3489 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3490 if (!E)
3491 return;
3492 SmallVector<llvm::Value *, 4> Uppers;
3493 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3494 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3495 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3496 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(VD->getType(), VD->getName()));
3497 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3498 addPrivate(
3499 LocalVD: HelperData.CounterVD,
3500 Addr: CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3501 }
3502 Privatize();
3503
3504 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3505 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3506 LValue CLVal =
3507 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3508 HelperData.CounterVD->getType());
3509 // Counter = 0;
3510 CGF.EmitStoreOfScalar(
3511 value: llvm::ConstantInt::get(Ty: CLVal.getAddress(CGF).getElementType(), V: 0),
3512 lvalue: CLVal);
3513 CodeGenFunction::JumpDest &ContDest =
3514 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3515 CodeGenFunction::JumpDest &ExitDest =
3516 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3517 // N = <number-of_iterations>;
3518 llvm::Value *N = Uppers[I];
3519 // cont:
3520 // if (Counter < N) goto body; else goto exit;
3521 CGF.EmitBlock(BB: ContDest.getBlock());
3522 auto *CVal =
3523 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3524 llvm::Value *Cmp =
3525 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3526 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3527 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3528 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3529 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3530 // body:
3531 CGF.EmitBlock(BB: BodyBB);
3532 // Iteri = Begini + Counter * Stepi;
3533 CGF.EmitIgnoredExpr(E: HelperData.Update);
3534 }
3535 }
3536 ~OMPIteratorGeneratorScope() {
3537 if (!E)
3538 return;
3539 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3540 // Counter = Counter + 1;
3541 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3542 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3543 // goto cont;
3544 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3545 // exit:
3546 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3547 }
3548 }
3549};
3550} // namespace
3551
3552static std::pair<llvm::Value *, llvm::Value *>
3553getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3554 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3555 llvm::Value *Addr;
3556 if (OASE) {
3557 const Expr *Base = OASE->getBase();
3558 Addr = CGF.EmitScalarExpr(E: Base);
3559 } else {
3560 Addr = CGF.EmitLValue(E).getPointer(CGF);
3561 }
3562 llvm::Value *SizeVal;
3563 QualType Ty = E->getType();
3564 if (OASE) {
3565 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3566 for (const Expr *SE : OASE->getDimensions()) {
3567 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3568 Sz = CGF.EmitScalarConversion(
3569 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3570 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3571 }
3572 } else if (const auto *ASE =
3573 dyn_cast<OMPArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3574 LValue UpAddrLVal =
3575 CGF.EmitOMPArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3576 Address UpAddrAddress = UpAddrLVal.getAddress(CGF);
3577 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3578 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3579 /*Idx0=*/1);
3580 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3581 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3582 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3583 } else {
3584 SizeVal = CGF.getTypeSize(Ty);
3585 }
3586 return std::make_pair(x&: Addr, y&: SizeVal);
3587}
3588
3589/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3590static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3591 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3592 if (KmpTaskAffinityInfoTy.isNull()) {
3593 RecordDecl *KmpAffinityInfoRD =
3594 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3595 KmpAffinityInfoRD->startDefinition();
3596 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3597 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3598 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3599 KmpAffinityInfoRD->completeDefinition();
3600 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3601 }
3602}
3603
3604CGOpenMPRuntime::TaskResultTy
3605CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3606 const OMPExecutableDirective &D,
3607 llvm::Function *TaskFunction, QualType SharedsTy,
3608 Address Shareds, const OMPTaskDataTy &Data) {
3609 ASTContext &C = CGM.getContext();
3610 llvm::SmallVector<PrivateDataTy, 4> Privates;
3611 // Aggregate privates and sort them by the alignment.
3612 const auto *I = Data.PrivateCopies.begin();
3613 for (const Expr *E : Data.PrivateVars) {
3614 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3615 Privates.emplace_back(
3616 Args: C.getDeclAlign(VD),
3617 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3618 /*PrivateElemInit=*/nullptr));
3619 ++I;
3620 }
3621 I = Data.FirstprivateCopies.begin();
3622 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3623 for (const Expr *E : Data.FirstprivateVars) {
3624 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3625 Privates.emplace_back(
3626 Args: C.getDeclAlign(VD),
3627 Args: PrivateHelpersTy(
3628 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3629 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3630 ++I;
3631 ++IElemInitRef;
3632 }
3633 I = Data.LastprivateCopies.begin();
3634 for (const Expr *E : Data.LastprivateVars) {
3635 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3636 Privates.emplace_back(
3637 Args: C.getDeclAlign(VD),
3638 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3639 /*PrivateElemInit=*/nullptr));
3640 ++I;
3641 }
3642 for (const VarDecl *VD : Data.PrivateLocals) {
3643 if (isAllocatableDecl(VD))
3644 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3645 else
3646 Privates.emplace_back(Args: C.getDeclAlign(VD), Args: PrivateHelpersTy(VD));
3647 }
3648 llvm::stable_sort(Range&: Privates,
3649 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3650 return L.first > R.first;
3651 });
3652 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3653 // Build type kmp_routine_entry_t (if not built yet).
3654 emitKmpRoutineEntryT(KmpInt32Ty);
3655 // Build type kmp_task_t (if not built yet).
3656 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3657 if (SavedKmpTaskloopTQTy.isNull()) {
3658 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3659 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3660 }
3661 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3662 } else {
3663 assert((D.getDirectiveKind() == OMPD_task ||
3664 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3665 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3666 "Expected taskloop, task or target directive");
3667 if (SavedKmpTaskTQTy.isNull()) {
3668 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3669 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3670 }
3671 KmpTaskTQTy = SavedKmpTaskTQTy;
3672 }
3673 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3674 // Build particular struct kmp_task_t for the given task.
3675 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3676 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3677 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3678 QualType KmpTaskTWithPrivatesPtrQTy =
3679 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3680 llvm::Type *KmpTaskTWithPrivatesTy = CGF.ConvertType(T: KmpTaskTWithPrivatesQTy);
3681 llvm::Type *KmpTaskTWithPrivatesPtrTy =
3682 KmpTaskTWithPrivatesTy->getPointerTo();
3683 llvm::Value *KmpTaskTWithPrivatesTySize =
3684 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3685 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3686
3687 // Emit initial values for private copies (if any).
3688 llvm::Value *TaskPrivatesMap = nullptr;
3689 llvm::Type *TaskPrivatesMapTy =
3690 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3691 if (!Privates.empty()) {
3692 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3693 TaskPrivatesMap =
3694 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3695 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3696 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3697 } else {
3698 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3699 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3700 }
3701 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3702 // kmp_task_t *tt);
3703 llvm::Function *TaskEntry = emitProxyTaskFunction(
3704 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3705 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3706 TaskPrivatesMap);
3707
3708 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3709 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3710 // kmp_routine_entry_t *task_entry);
3711 // Task flags. Format is taken from
3712 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3713 // description of kmp_tasking_flags struct.
3714 enum {
3715 TiedFlag = 0x1,
3716 FinalFlag = 0x2,
3717 DestructorsFlag = 0x8,
3718 PriorityFlag = 0x20,
3719 DetachableFlag = 0x40,
3720 };
3721 unsigned Flags = Data.Tied ? TiedFlag : 0;
3722 bool NeedsCleanup = false;
3723 if (!Privates.empty()) {
3724 NeedsCleanup =
3725 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3726 if (NeedsCleanup)
3727 Flags = Flags | DestructorsFlag;
3728 }
3729 if (Data.Priority.getInt())
3730 Flags = Flags | PriorityFlag;
3731 if (D.hasClausesOfKind<OMPDetachClause>())
3732 Flags = Flags | DetachableFlag;
3733 llvm::Value *TaskFlags =
3734 Data.Final.getPointer()
3735 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3736 True: CGF.Builder.getInt32(C: FinalFlag),
3737 False: CGF.Builder.getInt32(/*C=*/0))
3738 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3739 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3740 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3741 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3742 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3743 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3744 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3745 llvm::Value *NewTask;
3746 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3747 // Check if we have any device clause associated with the directive.
3748 const Expr *Device = nullptr;
3749 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3750 Device = C->getDevice();
3751 // Emit device ID if any otherwise use default value.
3752 llvm::Value *DeviceID;
3753 if (Device)
3754 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3755 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3756 else
3757 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3758 AllocArgs.push_back(Elt: DeviceID);
3759 NewTask = CGF.EmitRuntimeCall(
3760 callee: OMPBuilder.getOrCreateRuntimeFunction(
3761 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3762 args: AllocArgs);
3763 } else {
3764 NewTask =
3765 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3766 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3767 args: AllocArgs);
3768 }
3769 // Emit detach clause initialization.
3770 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3771 // task_descriptor);
3772 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3773 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3774 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3775
3776 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3777 // int gtid, kmp_task_t *task);
3778 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3779 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3780 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3781 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3782 callee: OMPBuilder.getOrCreateRuntimeFunction(
3783 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3784 args: {Loc, Tid, NewTask});
3785 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3786 Loc: Evt->getExprLoc());
3787 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3788 }
3789 // Process affinity clauses.
3790 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3791 // Process list of affinity data.
3792 ASTContext &C = CGM.getContext();
3793 Address AffinitiesArray = Address::invalid();
3794 // Calculate number of elements to form the array of affinity data.
3795 llvm::Value *NumOfElements = nullptr;
3796 unsigned NumAffinities = 0;
3797 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3798 if (const Expr *Modifier = C->getModifier()) {
3799 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3800 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3801 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3802 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3803 NumOfElements =
3804 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3805 }
3806 } else {
3807 NumAffinities += C->varlist_size();
3808 }
3809 }
3810 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3811 // Fields ids in kmp_task_affinity_info record.
3812 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3813
3814 QualType KmpTaskAffinityInfoArrayTy;
3815 if (NumOfElements) {
3816 NumOfElements = CGF.Builder.CreateNUWAdd(
3817 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3818 auto *OVE = new (C) OpaqueValueExpr(
3819 Loc,
3820 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3821 VK_PRValue);
3822 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3823 RValue::get(V: NumOfElements));
3824 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3825 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3826 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
3827 // Properly emit variable-sized array.
3828 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3829 ParamKind: ImplicitParamKind::Other);
3830 CGF.EmitVarDecl(*PD);
3831 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3832 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3833 /*isSigned=*/false);
3834 } else {
3835 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3836 KmpTaskAffinityInfoTy,
3837 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3838 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3839 AffinitiesArray =
3840 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3841 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3842 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3843 /*isSigned=*/IsSigned: false);
3844 }
3845
3846 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3847 // Fill array by elements without iterators.
3848 unsigned Pos = 0;
3849 bool HasIterator = false;
3850 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3851 if (C->getModifier()) {
3852 HasIterator = true;
3853 continue;
3854 }
3855 for (const Expr *E : C->varlists()) {
3856 llvm::Value *Addr;
3857 llvm::Value *Size;
3858 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3859 LValue Base =
3860 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3861 KmpTaskAffinityInfoTy);
3862 // affs[i].base_addr = &<Affinities[i].second>;
3863 LValue BaseAddrLVal = CGF.EmitLValueForField(
3864 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3865 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3866 BaseAddrLVal);
3867 // affs[i].len = sizeof(<Affinities[i].second>);
3868 LValue LenLVal = CGF.EmitLValueForField(
3869 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3870 CGF.EmitStoreOfScalar(Size, LenLVal);
3871 ++Pos;
3872 }
3873 }
3874 LValue PosLVal;
3875 if (HasIterator) {
3876 PosLVal = CGF.MakeAddrLValue(
3877 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3878 T: C.getSizeType());
3879 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3880 }
3881 // Process elements with iterators.
3882 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3883 const Expr *Modifier = C->getModifier();
3884 if (!Modifier)
3885 continue;
3886 OMPIteratorGeneratorScope IteratorScope(
3887 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3888 for (const Expr *E : C->varlists()) {
3889 llvm::Value *Addr;
3890 llvm::Value *Size;
3891 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3892 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3893 LValue Base =
3894 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3895 KmpTaskAffinityInfoTy);
3896 // affs[i].base_addr = &<Affinities[i].second>;
3897 LValue BaseAddrLVal = CGF.EmitLValueForField(
3898 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3899 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3900 BaseAddrLVal);
3901 // affs[i].len = sizeof(<Affinities[i].second>);
3902 LValue LenLVal = CGF.EmitLValueForField(
3903 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3904 CGF.EmitStoreOfScalar(Size, LenLVal);
3905 Idx = CGF.Builder.CreateNUWAdd(
3906 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3907 CGF.EmitStoreOfScalar(Idx, PosLVal);
3908 }
3909 }
3910 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3911 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3912 // naffins, kmp_task_affinity_info_t *affin_list);
3913 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3914 llvm::Value *GTid = getThreadID(CGF, Loc);
3915 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3916 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
3917 // FIXME: Emit the function and ignore its result for now unless the
3918 // runtime function is properly implemented.
3919 (void)CGF.EmitRuntimeCall(
3920 callee: OMPBuilder.getOrCreateRuntimeFunction(
3921 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3922 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3923 }
3924 llvm::Value *NewTaskNewTaskTTy =
3925 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3926 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3927 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
3928 T: KmpTaskTWithPrivatesQTy);
3929 LValue TDBase =
3930 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3931 // Fill the data in the resulting kmp_task_t record.
3932 // Copy shareds if there are any.
3933 Address KmpTaskSharedsPtr = Address::invalid();
3934 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3935 KmpTaskSharedsPtr = Address(
3936 CGF.EmitLoadOfScalar(
3937 CGF.EmitLValueForField(
3938 Base: TDBase,
3939 Field: *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3940 Loc),
3941 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3942 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3943 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3944 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3945 }
3946 // Emit initial values for private copies (if any).
3947 TaskResultTy Result;
3948 if (!Privates.empty()) {
3949 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3950 SharedsTy, SharedsPtrTy, Data, Privates,
3951 /*ForDup=*/false);
3952 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3953 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3954 Result.TaskDupFn = emitTaskDupFunction(
3955 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3956 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3957 /*WithLastIter=*/!Data.LastprivateVars.empty());
3958 }
3959 }
3960 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3961 enum { Priority = 0, Destructors = 1 };
3962 // Provide pointer to function with destructors for privates.
3963 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3964 const RecordDecl *KmpCmplrdataUD =
3965 (*FI)->getType()->getAsUnionType()->getDecl();
3966 if (NeedsCleanup) {
3967 llvm::Value *DestructorFn = emitDestructorsFunction(
3968 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3969 KmpTaskTWithPrivatesQTy);
3970 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3971 LValue DestructorsLV = CGF.EmitLValueForField(
3972 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3973 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3974 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3975 lvalue: DestructorsLV);
3976 }
3977 // Set priority.
3978 if (Data.Priority.getInt()) {
3979 LValue Data2LV = CGF.EmitLValueForField(
3980 Base: TDBase, Field: *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3981 LValue PriorityLV = CGF.EmitLValueForField(
3982 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3983 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3984 }
3985 Result.NewTask = NewTask;
3986 Result.TaskEntry = TaskEntry;
3987 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3988 Result.TDBase = TDBase;
3989 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3990 return Result;
3991}
3992
3993/// Translates internal dependency kind into the runtime kind.
3994static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3995 RTLDependenceKindTy DepKind;
3996 switch (K) {
3997 case OMPC_DEPEND_in:
3998 DepKind = RTLDependenceKindTy::DepIn;
3999 break;
4000 // Out and InOut dependencies must use the same code.
4001 case OMPC_DEPEND_out:
4002 case OMPC_DEPEND_inout:
4003 DepKind = RTLDependenceKindTy::DepInOut;
4004 break;
4005 case OMPC_DEPEND_mutexinoutset:
4006 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
4007 break;
4008 case OMPC_DEPEND_inoutset:
4009 DepKind = RTLDependenceKindTy::DepInOutSet;
4010 break;
4011 case OMPC_DEPEND_outallmemory:
4012 DepKind = RTLDependenceKindTy::DepOmpAllMem;
4013 break;
4014 case OMPC_DEPEND_source:
4015 case OMPC_DEPEND_sink:
4016 case OMPC_DEPEND_depobj:
4017 case OMPC_DEPEND_inoutallmemory:
4018 case OMPC_DEPEND_unknown:
4019 llvm_unreachable("Unknown task dependence type");
4020 }
4021 return DepKind;
4022}
4023
4024/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4025static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4026 QualType &FlagsTy) {
4027 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(C.BoolTy), /*Signed=*/false);
4028 if (KmpDependInfoTy.isNull()) {
4029 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4030 KmpDependInfoRD->startDefinition();
4031 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4032 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4033 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4034 KmpDependInfoRD->completeDefinition();
4035 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4036 }
4037}
4038
4039std::pair<llvm::Value *, LValue>
4040CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4041 SourceLocation Loc) {
4042 ASTContext &C = CGM.getContext();
4043 QualType FlagsTy;
4044 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4045 RecordDecl *KmpDependInfoRD =
4046 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4047 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4048 LValue Base = CGF.EmitLoadOfPointerLValue(
4049 Ptr: DepobjLVal.getAddress(CGF).withElementType(
4050 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4051 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4052 Address DepObjAddr = CGF.Builder.CreateGEP(
4053 CGF, Addr: Base.getAddress(CGF),
4054 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4055 LValue NumDepsBase = CGF.MakeAddrLValue(
4056 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4057 // NumDeps = deps[i].base_addr;
4058 LValue BaseAddrLVal = CGF.EmitLValueForField(
4059 Base: NumDepsBase,
4060 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4061 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4062 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4063 return std::make_pair(x&: NumDeps, y&: Base);
4064}
4065
4066static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4067 llvm::PointerUnion<unsigned *, LValue *> Pos,
4068 const OMPTaskDataTy::DependData &Data,
4069 Address DependenciesArray) {
4070 CodeGenModule &CGM = CGF.CGM;
4071 ASTContext &C = CGM.getContext();
4072 QualType FlagsTy;
4073 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4074 RecordDecl *KmpDependInfoRD =
4075 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4076 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4077
4078 OMPIteratorGeneratorScope IteratorScope(
4079 CGF, cast_or_null<OMPIteratorExpr>(
4080 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4081 : nullptr));
4082 for (const Expr *E : Data.DepExprs) {
4083 llvm::Value *Addr;
4084 llvm::Value *Size;
4085
4086 // The expression will be a nullptr in the 'omp_all_memory' case.
4087 if (E) {
4088 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4089 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4090 } else {
4091 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4092 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4093 }
4094 LValue Base;
4095 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4096 Base = CGF.MakeAddrLValue(
4097 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4098 } else {
4099 assert(E && "Expected a non-null expression");
4100 LValue &PosLVal = *Pos.get<LValue *>();
4101 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4102 Base = CGF.MakeAddrLValue(
4103 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4104 }
4105 // deps[i].base_addr = &<Dependencies[i].second>;
4106 LValue BaseAddrLVal = CGF.EmitLValueForField(
4107 Base,
4108 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4109 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4110 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4111 // deps[i].len = sizeof(<Dependencies[i].second>);
4112 LValue LenLVal = CGF.EmitLValueForField(
4113 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4114 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4115 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4116 // deps[i].flags = <Dependencies[i].first>;
4117 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4118 LValue FlagsLVal = CGF.EmitLValueForField(
4119 Base,
4120 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4121 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4122 CGF.EmitStoreOfScalar(
4123 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4124 lvalue: FlagsLVal);
4125 if (unsigned *P = Pos.dyn_cast<unsigned *>()) {
4126 ++(*P);
4127 } else {
4128 LValue &PosLVal = *Pos.get<LValue *>();
4129 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4130 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4131 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4132 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4133 }
4134 }
4135}
4136
4137SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4138 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4139 const OMPTaskDataTy::DependData &Data) {
4140 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4141 "Expected depobj dependency kind.");
4142 SmallVector<llvm::Value *, 4> Sizes;
4143 SmallVector<LValue, 4> SizeLVals;
4144 ASTContext &C = CGF.getContext();
4145 {
4146 OMPIteratorGeneratorScope IteratorScope(
4147 CGF, cast_or_null<OMPIteratorExpr>(
4148 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4149 : nullptr));
4150 for (const Expr *E : Data.DepExprs) {
4151 llvm::Value *NumDeps;
4152 LValue Base;
4153 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4154 std::tie(NumDeps, Base) =
4155 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4156 LValue NumLVal = CGF.MakeAddrLValue(
4157 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4158 T: C.getUIntPtrType());
4159 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4160 Addr: NumLVal.getAddress(CGF));
4161 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4162 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4163 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4164 SizeLVals.push_back(Elt: NumLVal);
4165 }
4166 }
4167 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4168 llvm::Value *Size =
4169 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4170 Sizes.push_back(Elt: Size);
4171 }
4172 return Sizes;
4173}
4174
4175void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4176 QualType &KmpDependInfoTy,
4177 LValue PosLVal,
4178 const OMPTaskDataTy::DependData &Data,
4179 Address DependenciesArray) {
4180 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4181 "Expected depobj dependency kind.");
4182 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4183 {
4184 OMPIteratorGeneratorScope IteratorScope(
4185 CGF, cast_or_null<OMPIteratorExpr>(
4186 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4187 : nullptr));
4188 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4189 const Expr *E = Data.DepExprs[I];
4190 llvm::Value *NumDeps;
4191 LValue Base;
4192 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4193 std::tie(NumDeps, Base) =
4194 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4195
4196 // memcopy dependency data.
4197 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4198 LHS: ElSize,
4199 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4200 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4201 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4202 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(CGF), Size);
4203
4204 // Increase pos.
4205 // pos += size;
4206 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4207 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4208 }
4209 }
4210}
4211
4212std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4213 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4214 SourceLocation Loc) {
4215 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4216 return D.DepExprs.empty();
4217 }))
4218 return std::make_pair(x: nullptr, y: Address::invalid());
4219 // Process list of dependencies.
4220 ASTContext &C = CGM.getContext();
4221 Address DependenciesArray = Address::invalid();
4222 llvm::Value *NumOfElements = nullptr;
4223 unsigned NumDependencies = std::accumulate(
4224 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4225 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4226 return D.DepKind == OMPC_DEPEND_depobj
4227 ? V
4228 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4229 });
4230 QualType FlagsTy;
4231 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4232 bool HasDepobjDeps = false;
4233 bool HasRegularWithIterators = false;
4234 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4235 llvm::Value *NumOfRegularWithIterators =
4236 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4237 // Calculate number of depobj dependencies and regular deps with the
4238 // iterators.
4239 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4240 if (D.DepKind == OMPC_DEPEND_depobj) {
4241 SmallVector<llvm::Value *, 4> Sizes =
4242 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4243 for (llvm::Value *Size : Sizes) {
4244 NumOfDepobjElements =
4245 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4246 }
4247 HasDepobjDeps = true;
4248 continue;
4249 }
4250 // Include number of iterations, if any.
4251
4252 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4253 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4254 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4255 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4256 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4257 LHS: Sz, RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4258 NumOfRegularWithIterators =
4259 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4260 }
4261 HasRegularWithIterators = true;
4262 continue;
4263 }
4264 }
4265
4266 QualType KmpDependInfoArrayTy;
4267 if (HasDepobjDeps || HasRegularWithIterators) {
4268 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4269 /*isSigned=*/IsSigned: false);
4270 if (HasDepobjDeps) {
4271 NumOfElements =
4272 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4273 }
4274 if (HasRegularWithIterators) {
4275 NumOfElements =
4276 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4277 }
4278 auto *OVE = new (C) OpaqueValueExpr(
4279 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4280 VK_PRValue);
4281 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4282 RValue::get(V: NumOfElements));
4283 KmpDependInfoArrayTy =
4284 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4285 /*IndexTypeQuals=*/0, SourceRange(Loc, Loc));
4286 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4287 // Properly emit variable-sized array.
4288 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4289 ParamKind: ImplicitParamKind::Other);
4290 CGF.EmitVarDecl(*PD);
4291 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4292 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4293 /*isSigned=*/false);
4294 } else {
4295 KmpDependInfoArrayTy = C.getConstantArrayType(
4296 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4297 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4298 DependenciesArray =
4299 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4300 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4301 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4302 /*isSigned=*/IsSigned: false);
4303 }
4304 unsigned Pos = 0;
4305 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4306 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4307 Dependencies[I].IteratorExpr)
4308 continue;
4309 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4310 DependenciesArray);
4311 }
4312 // Copy regular dependencies with iterators.
4313 LValue PosLVal = CGF.MakeAddrLValue(
4314 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4315 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4316 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4317 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4318 !Dependencies[I].IteratorExpr)
4319 continue;
4320 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4321 DependenciesArray);
4322 }
4323 // Copy final depobj arrays without iterators.
4324 if (HasDepobjDeps) {
4325 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4326 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4327 continue;
4328 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4329 DependenciesArray);
4330 }
4331 }
4332 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4333 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4334 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4335}
4336
4337Address CGOpenMPRuntime::emitDepobjDependClause(
4338 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4339 SourceLocation Loc) {
4340 if (Dependencies.DepExprs.empty())
4341 return Address::invalid();
4342 // Process list of dependencies.
4343 ASTContext &C = CGM.getContext();
4344 Address DependenciesArray = Address::invalid();
4345 unsigned NumDependencies = Dependencies.DepExprs.size();
4346 QualType FlagsTy;
4347 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4348 RecordDecl *KmpDependInfoRD =
4349 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4350
4351 llvm::Value *Size;
4352 // Define type kmp_depend_info[<Dependencies.size()>];
4353 // For depobj reserve one extra element to store the number of elements.
4354 // It is required to handle depobj(x) update(in) construct.
4355 // kmp_depend_info[<Dependencies.size()>] deps;
4356 llvm::Value *NumDepsVal;
4357 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4358 if (const auto *IE =
4359 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4360 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4361 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4362 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4363 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4364 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4365 }
4366 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4367 RHS: NumDepsVal);
4368 CharUnits SizeInBytes =
4369 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4370 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4371 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4372 NumDepsVal =
4373 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4374 } else {
4375 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4376 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4377 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4378 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4379 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4380 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4381 }
4382 // Need to allocate on the dynamic memory.
4383 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4384 // Use default allocator.
4385 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4386 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4387
4388 llvm::Value *Addr =
4389 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4390 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4391 args: Args, name: ".dep.arr.addr");
4392 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4393 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4394 V: Addr, DestTy: KmpDependInfoLlvmTy->getPointerTo());
4395 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4396 // Write number of elements in the first element of array for depobj.
4397 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4398 // deps[i].base_addr = NumDependencies;
4399 LValue BaseAddrLVal = CGF.EmitLValueForField(
4400 Base,
4401 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4402 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4403 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4404 llvm::PointerUnion<unsigned *, LValue *> Pos;
4405 unsigned Idx = 1;
4406 LValue PosLVal;
4407 if (Dependencies.IteratorExpr) {
4408 PosLVal = CGF.MakeAddrLValue(
4409 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4410 T: C.getSizeType());
4411 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4412 /*IsInit=*/isInit: true);
4413 Pos = &PosLVal;
4414 } else {
4415 Pos = &Idx;
4416 }
4417 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4418 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4419 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4420 ElementTy: CGF.Int8Ty);
4421 return DependenciesArray;
4422}
4423
4424void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4425 SourceLocation Loc) {
4426 ASTContext &C = CGM.getContext();
4427 QualType FlagsTy;
4428 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4429 LValue Base = CGF.EmitLoadOfPointerLValue(
4430 Ptr: DepobjLVal.getAddress(CGF), PtrTy: C.VoidPtrTy.castAs<PointerType>());
4431 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4432 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4433 Base.getAddress(CGF), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4434 CGF.ConvertTypeForMem(KmpDependInfoTy));
4435 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4436 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4437 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4438 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4439 DestTy: CGF.VoidPtrTy);
4440 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4441 // Use default allocator.
4442 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4443 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4444
4445 // _kmpc_free(gtid, addr, nullptr);
4446 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4447 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4448 args: Args);
4449}
4450
4451void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4452 OpenMPDependClauseKind NewDepKind,
4453 SourceLocation Loc) {
4454 ASTContext &C = CGM.getContext();
4455 QualType FlagsTy;
4456 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4457 RecordDecl *KmpDependInfoRD =
4458 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4459 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4460 llvm::Value *NumDeps;
4461 LValue Base;
4462 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4463
4464 Address Begin = Base.getAddress(CGF);
4465 // Cast from pointer to array type to pointer to single element.
4466 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4467 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4468 // The basic structure here is a while-do loop.
4469 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4470 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4471 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4472 CGF.EmitBlock(BB: BodyBB);
4473 llvm::PHINode *ElementPHI =
4474 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4475 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4476 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4477 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4478 Base.getTBAAInfo());
4479 // deps[i].flags = NewDepKind;
4480 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4481 LValue FlagsLVal = CGF.EmitLValueForField(
4482 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4483 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4484 CGF.EmitStoreOfScalar(
4485 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4486 lvalue: FlagsLVal);
4487
4488 // Shift the address forward by one element.
4489 llvm::Value *ElementNext =
4490 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4491 .emitRawPointer(CGF);
4492 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4493 llvm::Value *IsEmpty =
4494 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4495 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4496 // Done.
4497 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4498}
4499
4500void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4501 const OMPExecutableDirective &D,
4502 llvm::Function *TaskFunction,
4503 QualType SharedsTy, Address Shareds,
4504 const Expr *IfCond,
4505 const OMPTaskDataTy &Data) {
4506 if (!CGF.HaveInsertPoint())
4507 return;
4508
4509 TaskResultTy Result =
4510 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4511 llvm::Value *NewTask = Result.NewTask;
4512 llvm::Function *TaskEntry = Result.TaskEntry;
4513 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4514 LValue TDBase = Result.TDBase;
4515 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4516 // Process list of dependences.
4517 Address DependenciesArray = Address::invalid();
4518 llvm::Value *NumOfElements;
4519 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4520 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4521
4522 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4523 // libcall.
4524 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4525 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4526 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4527 // list is not empty
4528 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4529 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4530 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4531 llvm::Value *DepTaskArgs[7];
4532 if (!Data.Dependences.empty()) {
4533 DepTaskArgs[0] = UpLoc;
4534 DepTaskArgs[1] = ThreadID;
4535 DepTaskArgs[2] = NewTask;
4536 DepTaskArgs[3] = NumOfElements;
4537 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4538 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4539 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4540 }
4541 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4542 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4543 if (!Data.Tied) {
4544 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4545 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4546 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4547 }
4548 if (!Data.Dependences.empty()) {
4549 CGF.EmitRuntimeCall(
4550 callee: OMPBuilder.getOrCreateRuntimeFunction(
4551 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4552 args: DepTaskArgs);
4553 } else {
4554 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4555 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4556 args: TaskArgs);
4557 }
4558 // Check if parent region is untied and build return for untied task;
4559 if (auto *Region =
4560 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4561 Region->emitUntiedSwitch(CGF);
4562 };
4563
4564 llvm::Value *DepWaitTaskArgs[7];
4565 if (!Data.Dependences.empty()) {
4566 DepWaitTaskArgs[0] = UpLoc;
4567 DepWaitTaskArgs[1] = ThreadID;
4568 DepWaitTaskArgs[2] = NumOfElements;
4569 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4570 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4571 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4572 DepWaitTaskArgs[6] =
4573 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4574 }
4575 auto &M = CGM.getModule();
4576 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4577 TaskEntry, &Data, &DepWaitTaskArgs,
4578 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4579 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4580 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4581 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4582 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4583 // is specified.
4584 if (!Data.Dependences.empty())
4585 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4586 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4587 args: DepWaitTaskArgs);
4588 // Call proxy_task_entry(gtid, new_task);
4589 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4590 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4591 Action.Enter(CGF);
4592 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4593 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4594 Args: OutlinedFnArgs);
4595 };
4596
4597 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4598 // kmp_task_t *new_task);
4599 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4600 // kmp_task_t *new_task);
4601 RegionCodeGenTy RCG(CodeGen);
4602 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4603 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4604 TaskArgs,
4605 OMPBuilder.getOrCreateRuntimeFunction(
4606 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4607 TaskArgs);
4608 RCG.setAction(Action);
4609 RCG(CGF);
4610 };
4611
4612 if (IfCond) {
4613 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4614 } else {
4615 RegionCodeGenTy ThenRCG(ThenCodeGen);
4616 ThenRCG(CGF);
4617 }
4618}
4619
4620void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4621 const OMPLoopDirective &D,
4622 llvm::Function *TaskFunction,
4623 QualType SharedsTy, Address Shareds,
4624 const Expr *IfCond,
4625 const OMPTaskDataTy &Data) {
4626 if (!CGF.HaveInsertPoint())
4627 return;
4628 TaskResultTy Result =
4629 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4630 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4631 // libcall.
4632 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4633 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4634 // sched, kmp_uint64 grainsize, void *task_dup);
4635 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4636 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4637 llvm::Value *IfVal;
4638 if (IfCond) {
4639 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4640 /*isSigned=*/true);
4641 } else {
4642 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4643 }
4644
4645 LValue LBLVal = CGF.EmitLValueForField(
4646 Base: Result.TDBase,
4647 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4648 const auto *LBVar =
4649 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4650 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(CGF),
4651 Quals: LBLVal.getQuals(),
4652 /*IsInitializer=*/true);
4653 LValue UBLVal = CGF.EmitLValueForField(
4654 Base: Result.TDBase,
4655 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4656 const auto *UBVar =
4657 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4658 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(CGF),
4659 Quals: UBLVal.getQuals(),
4660 /*IsInitializer=*/true);
4661 LValue StLVal = CGF.EmitLValueForField(
4662 Base: Result.TDBase,
4663 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4664 const auto *StVar =
4665 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4666 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(CGF),
4667 Quals: StLVal.getQuals(),
4668 /*IsInitializer=*/true);
4669 // Store reductions address.
4670 LValue RedLVal = CGF.EmitLValueForField(
4671 Base: Result.TDBase,
4672 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4673 if (Data.Reductions) {
4674 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4675 } else {
4676 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(CGF),
4677 Ty: CGF.getContext().VoidPtrTy);
4678 }
4679 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4680 llvm::Value *TaskArgs[] = {
4681 UpLoc,
4682 ThreadID,
4683 Result.NewTask,
4684 IfVal,
4685 LBLVal.getPointer(CGF),
4686 UBLVal.getPointer(CGF),
4687 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4688 llvm::ConstantInt::getSigned(
4689 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4690 llvm::ConstantInt::getSigned(
4691 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4692 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4693 : NoSchedule),
4694 Data.Schedule.getPointer()
4695 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4696 /*isSigned=*/false)
4697 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0),
4698 Result.TaskDupFn ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4699 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4700 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy)};
4701 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4702 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskloop),
4703 args: TaskArgs);
4704}
4705
4706/// Emit reduction operation for each element of array (required for
4707/// array sections) LHS op = RHS.
4708/// \param Type Type of array.
4709/// \param LHSVar Variable on the left side of the reduction operation
4710/// (references element of array in original variable).
4711/// \param RHSVar Variable on the right side of the reduction operation
4712/// (references element of array in original variable).
4713/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4714/// RHSVar.
4715static void EmitOMPAggregateReduction(
4716 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4717 const VarDecl *RHSVar,
4718 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4719 const Expr *, const Expr *)> &RedOpGen,
4720 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4721 const Expr *UpExpr = nullptr) {
4722 // Perform element-by-element initialization.
4723 QualType ElementTy;
4724 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4725 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4726
4727 // Drill down to the base element type on both arrays.
4728 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4729 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4730
4731 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4732 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4733 // Cast from pointer to array type to pointer to single element.
4734 llvm::Value *LHSEnd =
4735 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4736 // The basic structure here is a while-do loop.
4737 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4738 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4739 llvm::Value *IsEmpty =
4740 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4741 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4742
4743 // Enter the loop body, making that address the current address.
4744 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4745 CGF.EmitBlock(BB: BodyBB);
4746
4747 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4748
4749 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4750 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4751 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4752 Address RHSElementCurrent(
4753 RHSElementPHI, RHSAddr.getElementType(),
4754 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4755
4756 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4757 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4758 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4759 Address LHSElementCurrent(
4760 LHSElementPHI, LHSAddr.getElementType(),
4761 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4762
4763 // Emit copy.
4764 CodeGenFunction::OMPPrivateScope Scope(CGF);
4765 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4766 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4767 Scope.Privatize();
4768 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4769 Scope.ForceCleanup();
4770
4771 // Shift the address forward by one element.
4772 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4773 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4774 Name: "omp.arraycpy.dest.element");
4775 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4776 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4777 Name: "omp.arraycpy.src.element");
4778 // Check whether we've reached the end.
4779 llvm::Value *Done =
4780 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4781 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4782 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4783 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4784
4785 // Done.
4786 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4787}
4788
4789/// Emit reduction combiner. If the combiner is a simple expression emit it as
4790/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4791/// UDR combiner function.
4792static void emitReductionCombiner(CodeGenFunction &CGF,
4793 const Expr *ReductionOp) {
4794 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4795 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4796 if (const auto *DRE =
4797 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4798 if (const auto *DRD =
4799 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4800 std::pair<llvm::Function *, llvm::Function *> Reduction =
4801 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4802 RValue Func = RValue::get(V: Reduction.first);
4803 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4804 CGF.EmitIgnoredExpr(E: ReductionOp);
4805 return;
4806 }
4807 CGF.EmitIgnoredExpr(E: ReductionOp);
4808}
4809
4810llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4811 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4812 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4813 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4814 ASTContext &C = CGM.getContext();
4815
4816 // void reduction_func(void *LHSArg, void *RHSArg);
4817 FunctionArgList Args;
4818 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4819 ImplicitParamKind::Other);
4820 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4821 ImplicitParamKind::Other);
4822 Args.push_back(&LHSArg);
4823 Args.push_back(&RHSArg);
4824 const auto &CGFI =
4825 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4826 std::string Name = getReductionFuncName(Name: ReducerName);
4827 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4828 llvm::GlobalValue::InternalLinkage, Name,
4829 &CGM.getModule());
4830 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4831 Fn->setDoesNotRecurse();
4832 CodeGenFunction CGF(CGM);
4833 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4834
4835 // Dst = (void*[n])(LHSArg);
4836 // Src = (void*[n])(RHSArg);
4837 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4838 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
4839 DestTy: ArgsElemType->getPointerTo()),
4840 ArgsElemType, CGF.getPointerAlign());
4841 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4842 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
4843 DestTy: ArgsElemType->getPointerTo()),
4844 ArgsElemType, CGF.getPointerAlign());
4845
4846 // ...
4847 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4848 // ...
4849 CodeGenFunction::OMPPrivateScope Scope(CGF);
4850 const auto *IPriv = Privates.begin();
4851 unsigned Idx = 0;
4852 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4853 const auto *RHSVar =
4854 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4855 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4856 const auto *LHSVar =
4857 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4858 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4859 QualType PrivTy = (*IPriv)->getType();
4860 if (PrivTy->isVariablyModifiedType()) {
4861 // Get array size and emit VLA type.
4862 ++Idx;
4863 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4864 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4865 const VariableArrayType *VLA =
4866 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4867 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4868 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4869 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4870 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4871 }
4872 }
4873 Scope.Privatize();
4874 IPriv = Privates.begin();
4875 const auto *ILHS = LHSExprs.begin();
4876 const auto *IRHS = RHSExprs.begin();
4877 for (const Expr *E : ReductionOps) {
4878 if ((*IPriv)->getType()->isArrayType()) {
4879 // Emit reduction for array section.
4880 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4881 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4882 EmitOMPAggregateReduction(
4883 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4884 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4885 emitReductionCombiner(CGF, ReductionOp: E);
4886 });
4887 } else {
4888 // Emit reduction for array subscript or single variable.
4889 emitReductionCombiner(CGF, ReductionOp: E);
4890 }
4891 ++IPriv;
4892 ++ILHS;
4893 ++IRHS;
4894 }
4895 Scope.ForceCleanup();
4896 CGF.FinishFunction();
4897 return Fn;
4898}
4899
4900void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4901 const Expr *ReductionOp,
4902 const Expr *PrivateRef,
4903 const DeclRefExpr *LHS,
4904 const DeclRefExpr *RHS) {
4905 if (PrivateRef->getType()->isArrayType()) {
4906 // Emit reduction for array section.
4907 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4908 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4909 EmitOMPAggregateReduction(
4910 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4911 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4912 emitReductionCombiner(CGF, ReductionOp);
4913 });
4914 } else {
4915 // Emit reduction for array subscript or single variable.
4916 emitReductionCombiner(CGF, ReductionOp);
4917 }
4918}
4919
4920void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
4921 ArrayRef<const Expr *> Privates,
4922 ArrayRef<const Expr *> LHSExprs,
4923 ArrayRef<const Expr *> RHSExprs,
4924 ArrayRef<const Expr *> ReductionOps,
4925 ReductionOptionsTy Options) {
4926 if (!CGF.HaveInsertPoint())
4927 return;
4928
4929 bool WithNowait = Options.WithNowait;
4930 bool SimpleReduction = Options.SimpleReduction;
4931
4932 // Next code should be emitted for reduction:
4933 //
4934 // static kmp_critical_name lock = { 0 };
4935 //
4936 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4937 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4938 // ...
4939 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4940 // *(Type<n>-1*)rhs[<n>-1]);
4941 // }
4942 //
4943 // ...
4944 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4945 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4946 // RedList, reduce_func, &<lock>)) {
4947 // case 1:
4948 // ...
4949 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4950 // ...
4951 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4952 // break;
4953 // case 2:
4954 // ...
4955 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4956 // ...
4957 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4958 // break;
4959 // default:;
4960 // }
4961 //
4962 // if SimpleReduction is true, only the next code is generated:
4963 // ...
4964 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4965 // ...
4966
4967 ASTContext &C = CGM.getContext();
4968
4969 if (SimpleReduction) {
4970 CodeGenFunction::RunCleanupsScope Scope(CGF);
4971 const auto *IPriv = Privates.begin();
4972 const auto *ILHS = LHSExprs.begin();
4973 const auto *IRHS = RHSExprs.begin();
4974 for (const Expr *E : ReductionOps) {
4975 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
4976 RHS: cast<DeclRefExpr>(Val: *IRHS));
4977 ++IPriv;
4978 ++ILHS;
4979 ++IRHS;
4980 }
4981 return;
4982 }
4983
4984 // 1. Build a list of reduction variables.
4985 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4986 auto Size = RHSExprs.size();
4987 for (const Expr *E : Privates) {
4988 if (E->getType()->isVariablyModifiedType())
4989 // Reserve place for array size.
4990 ++Size;
4991 }
4992 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
4993 QualType ReductionArrayTy = C.getConstantArrayType(
4994 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
4995 /*IndexTypeQuals=*/0);
4996 RawAddress ReductionList =
4997 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
4998 const auto *IPriv = Privates.begin();
4999 unsigned Idx = 0;
5000 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5001 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5002 CGF.Builder.CreateStore(
5003 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5004 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5005 Addr: Elem);
5006 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5007 // Store array size.
5008 ++Idx;
5009 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5010 llvm::Value *Size = CGF.Builder.CreateIntCast(
5011 V: CGF.getVLASize(
5012 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5013 .NumElts,
5014 DestTy: CGF.SizeTy, /*isSigned=*/false);
5015 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5016 Addr: Elem);
5017 }
5018 }
5019
5020 // 2. Emit reduce_func().
5021 llvm::Function *ReductionFn = emitReductionFunction(
5022 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5023 Privates, LHSExprs, RHSExprs, ReductionOps);
5024
5025 // 3. Create static kmp_critical_name lock = { 0 };
5026 std::string Name = getName(Parts: {"reduction"});
5027 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5028
5029 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5030 // RedList, reduce_func, &<lock>);
5031 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5032 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5033 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5034 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5035 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5036 llvm::Value *Args[] = {
5037 IdentTLoc, // ident_t *<loc>
5038 ThreadId, // i32 <gtid>
5039 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5040 ReductionArrayTySize, // size_type sizeof(RedList)
5041 RL, // void *RedList
5042 ReductionFn, // void (*) (void *, void *) <reduce_func>
5043 Lock // kmp_critical_name *&<lock>
5044 };
5045 llvm::Value *Res = CGF.EmitRuntimeCall(
5046 callee: OMPBuilder.getOrCreateRuntimeFunction(
5047 M&: CGM.getModule(),
5048 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5049 args: Args);
5050
5051 // 5. Build switch(res)
5052 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5053 llvm::SwitchInst *SwInst =
5054 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5055
5056 // 6. Build case 1:
5057 // ...
5058 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5059 // ...
5060 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5061 // break;
5062 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5063 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5064 CGF.EmitBlock(BB: Case1BB);
5065
5066 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5067 llvm::Value *EndArgs[] = {
5068 IdentTLoc, // ident_t *<loc>
5069 ThreadId, // i32 <gtid>
5070 Lock // kmp_critical_name *&<lock>
5071 };
5072 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5073 CodeGenFunction &CGF, PrePostActionTy &Action) {
5074 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5075 const auto *IPriv = Privates.begin();
5076 const auto *ILHS = LHSExprs.begin();
5077 const auto *IRHS = RHSExprs.begin();
5078 for (const Expr *E : ReductionOps) {
5079 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5080 RHS: cast<DeclRefExpr>(Val: *IRHS));
5081 ++IPriv;
5082 ++ILHS;
5083 ++IRHS;
5084 }
5085 };
5086 RegionCodeGenTy RCG(CodeGen);
5087 CommonActionTy Action(
5088 nullptr, std::nullopt,
5089 OMPBuilder.getOrCreateRuntimeFunction(
5090 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5091 : OMPRTL___kmpc_end_reduce),
5092 EndArgs);
5093 RCG.setAction(Action);
5094 RCG(CGF);
5095
5096 CGF.EmitBranch(Block: DefaultBB);
5097
5098 // 7. Build case 2:
5099 // ...
5100 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5101 // ...
5102 // break;
5103 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5104 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5105 CGF.EmitBlock(BB: Case2BB);
5106
5107 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5108 CodeGenFunction &CGF, PrePostActionTy &Action) {
5109 const auto *ILHS = LHSExprs.begin();
5110 const auto *IRHS = RHSExprs.begin();
5111 const auto *IPriv = Privates.begin();
5112 for (const Expr *E : ReductionOps) {
5113 const Expr *XExpr = nullptr;
5114 const Expr *EExpr = nullptr;
5115 const Expr *UpExpr = nullptr;
5116 BinaryOperatorKind BO = BO_Comma;
5117 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5118 if (BO->getOpcode() == BO_Assign) {
5119 XExpr = BO->getLHS();
5120 UpExpr = BO->getRHS();
5121 }
5122 }
5123 // Try to emit update expression as a simple atomic.
5124 const Expr *RHSExpr = UpExpr;
5125 if (RHSExpr) {
5126 // Analyze RHS part of the whole expression.
5127 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5128 Val: RHSExpr->IgnoreParenImpCasts())) {
5129 // If this is a conditional operator, analyze its condition for
5130 // min/max reduction operator.
5131 RHSExpr = ACO->getCond();
5132 }
5133 if (const auto *BORHS =
5134 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5135 EExpr = BORHS->getRHS();
5136 BO = BORHS->getOpcode();
5137 }
5138 }
5139 if (XExpr) {
5140 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5141 auto &&AtomicRedGen = [BO, VD,
5142 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5143 const Expr *EExpr, const Expr *UpExpr) {
5144 LValue X = CGF.EmitLValue(E: XExpr);
5145 RValue E;
5146 if (EExpr)
5147 E = CGF.EmitAnyExpr(E: EExpr);
5148 CGF.EmitOMPAtomicSimpleUpdateExpr(
5149 X, E, BO, /*IsXLHSInRHSPart=*/true,
5150 AO: llvm::AtomicOrdering::Monotonic, Loc,
5151 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5152 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5153 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5154 CGF.emitOMPSimpleStore(
5155 LVal: CGF.MakeAddrLValue(LHSTemp, VD->getType()), RVal: XRValue,
5156 RValTy: VD->getType().getNonReferenceType(), Loc);
5157 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5158 (void)PrivateScope.Privatize();
5159 return CGF.EmitAnyExpr(E: UpExpr);
5160 });
5161 };
5162 if ((*IPriv)->getType()->isArrayType()) {
5163 // Emit atomic reduction for array section.
5164 const auto *RHSVar =
5165 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5166 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5167 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5168 } else {
5169 // Emit atomic reduction for array subscript or single variable.
5170 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5171 }
5172 } else {
5173 // Emit as a critical region.
5174 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5175 const Expr *, const Expr *) {
5176 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5177 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5178 RT.emitCriticalRegion(
5179 CGF, CriticalName: Name,
5180 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5181 Action.Enter(CGF);
5182 emitReductionCombiner(CGF, ReductionOp: E);
5183 },
5184 Loc);
5185 };
5186 if ((*IPriv)->getType()->isArrayType()) {
5187 const auto *LHSVar =
5188 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5189 const auto *RHSVar =
5190 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5191 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5192 RedOpGen: CritRedGen);
5193 } else {
5194 CritRedGen(CGF, nullptr, nullptr, nullptr);
5195 }
5196 }
5197 ++ILHS;
5198 ++IRHS;
5199 ++IPriv;
5200 }
5201 };
5202 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5203 if (!WithNowait) {
5204 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5205 llvm::Value *EndArgs[] = {
5206 IdentTLoc, // ident_t *<loc>
5207 ThreadId, // i32 <gtid>
5208 Lock // kmp_critical_name *&<lock>
5209 };
5210 CommonActionTy Action(nullptr, std::nullopt,
5211 OMPBuilder.getOrCreateRuntimeFunction(
5212 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5213 EndArgs);
5214 AtomicRCG.setAction(Action);
5215 AtomicRCG(CGF);
5216 } else {
5217 AtomicRCG(CGF);
5218 }
5219
5220 CGF.EmitBranch(Block: DefaultBB);
5221 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5222}
5223
5224/// Generates unique name for artificial threadprivate variables.
5225/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5226static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5227 const Expr *Ref) {
5228 SmallString<256> Buffer;
5229 llvm::raw_svector_ostream Out(Buffer);
5230 const clang::DeclRefExpr *DE;
5231 const VarDecl *D = ::getBaseDecl(Ref, DE);
5232 if (!D)
5233 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5234 D = D->getCanonicalDecl();
5235 std::string Name = CGM.getOpenMPRuntime().getName(
5236 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5237 Out << Prefix << Name << "_"
5238 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5239 return std::string(Out.str());
5240}
5241
5242/// Emits reduction initializer function:
5243/// \code
5244/// void @.red_init(void* %arg, void* %orig) {
5245/// %0 = bitcast void* %arg to <type>*
5246/// store <type> <init>, <type>* %0
5247/// ret void
5248/// }
5249/// \endcode
5250static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5251 SourceLocation Loc,
5252 ReductionCodeGen &RCG, unsigned N) {
5253 ASTContext &C = CGM.getContext();
5254 QualType VoidPtrTy = C.VoidPtrTy;
5255 VoidPtrTy.addRestrict();
5256 FunctionArgList Args;
5257 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5258 ImplicitParamKind::Other);
5259 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5260 ImplicitParamKind::Other);
5261 Args.emplace_back(Args: &Param);
5262 Args.emplace_back(Args: &ParamOrig);
5263 const auto &FnInfo =
5264 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5265 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5266 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5267 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5268 N: Name, M: &CGM.getModule());
5269 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5270 Fn->setDoesNotRecurse();
5271 CodeGenFunction CGF(CGM);
5272 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5273 QualType PrivateType = RCG.getPrivateType(N);
5274 Address PrivateAddr = CGF.EmitLoadOfPointer(
5275 Ptr: CGF.GetAddrOfLocalVar(&Param).withElementType(
5276 ElemTy: CGF.ConvertTypeForMem(T: PrivateType)->getPointerTo()),
5277 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5278 llvm::Value *Size = nullptr;
5279 // If the size of the reduction item is non-constant, load it from global
5280 // threadprivate variable.
5281 if (RCG.getSizes(N).second) {
5282 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5283 CGF, VarType: CGM.getContext().getSizeType(),
5284 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5285 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5286 Ty: CGM.getContext().getSizeType(), Loc);
5287 }
5288 RCG.emitAggregateType(CGF, N, Size);
5289 Address OrigAddr = Address::invalid();
5290 // If initializer uses initializer from declare reduction construct, emit a
5291 // pointer to the address of the original reduction item (reuired by reduction
5292 // initializer)
5293 if (RCG.usesReductionInitializer(N)) {
5294 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5295 OrigAddr = CGF.EmitLoadOfPointer(
5296 Ptr: SharedAddr,
5297 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5298 }
5299 // Emit the initializer:
5300 // %0 = bitcast void* %arg to <type>*
5301 // store <type> <init>, <type>* %0
5302 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5303 DefaultInit: [](CodeGenFunction &) { return false; });
5304 CGF.FinishFunction();
5305 return Fn;
5306}
5307
5308/// Emits reduction combiner function:
5309/// \code
5310/// void @.red_comb(void* %arg0, void* %arg1) {
5311/// %lhs = bitcast void* %arg0 to <type>*
5312/// %rhs = bitcast void* %arg1 to <type>*
5313/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5314/// store <type> %2, <type>* %lhs
5315/// ret void
5316/// }
5317/// \endcode
5318static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5319 SourceLocation Loc,
5320 ReductionCodeGen &RCG, unsigned N,
5321 const Expr *ReductionOp,
5322 const Expr *LHS, const Expr *RHS,
5323 const Expr *PrivateRef) {
5324 ASTContext &C = CGM.getContext();
5325 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5326 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5327 FunctionArgList Args;
5328 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5329 C.VoidPtrTy, ImplicitParamKind::Other);
5330 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5331 ImplicitParamKind::Other);
5332 Args.emplace_back(Args: &ParamInOut);
5333 Args.emplace_back(Args: &ParamIn);
5334 const auto &FnInfo =
5335 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5336 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5337 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5338 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5339 N: Name, M: &CGM.getModule());
5340 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5341 Fn->setDoesNotRecurse();
5342 CodeGenFunction CGF(CGM);
5343 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5344 llvm::Value *Size = nullptr;
5345 // If the size of the reduction item is non-constant, load it from global
5346 // threadprivate variable.
5347 if (RCG.getSizes(N).second) {
5348 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5349 CGF, VarType: CGM.getContext().getSizeType(),
5350 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5351 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5352 Ty: CGM.getContext().getSizeType(), Loc);
5353 }
5354 RCG.emitAggregateType(CGF, N, Size);
5355 // Remap lhs and rhs variables to the addresses of the function arguments.
5356 // %lhs = bitcast void* %arg0 to <type>*
5357 // %rhs = bitcast void* %arg1 to <type>*
5358 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5359 PrivateScope.addPrivate(
5360 LocalVD: LHSVD,
5361 // Pull out the pointer to the variable.
5362 Addr: CGF.EmitLoadOfPointer(
5363 Ptr: CGF.GetAddrOfLocalVar(&ParamInOut)
5364 .withElementType(
5365 ElemTy: CGF.ConvertTypeForMem(T: LHSVD->getType())->getPointerTo()),
5366 PtrTy: C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5367 PrivateScope.addPrivate(
5368 LocalVD: RHSVD,
5369 // Pull out the pointer to the variable.
5370 Addr: CGF.EmitLoadOfPointer(
5371 Ptr: CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5372 ElemTy: CGF.ConvertTypeForMem(T: RHSVD->getType())->getPointerTo()),
5373 PtrTy: C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5374 PrivateScope.Privatize();
5375 // Emit the combiner body:
5376 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5377 // store <type> %2, <type>* %lhs
5378 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5379 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5380 RHS: cast<DeclRefExpr>(Val: RHS));
5381 CGF.FinishFunction();
5382 return Fn;
5383}
5384
5385/// Emits reduction finalizer function:
5386/// \code
5387/// void @.red_fini(void* %arg) {
5388/// %0 = bitcast void* %arg to <type>*
5389/// <destroy>(<type>* %0)
5390/// ret void
5391/// }
5392/// \endcode
5393static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5394 SourceLocation Loc,
5395 ReductionCodeGen &RCG, unsigned N) {
5396 if (!RCG.needCleanups(N))
5397 return nullptr;
5398 ASTContext &C = CGM.getContext();
5399 FunctionArgList Args;
5400 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5401 ImplicitParamKind::Other);
5402 Args.emplace_back(Args: &Param);
5403 const auto &FnInfo =
5404 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5405 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5406 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5407 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5408 N: Name, M: &CGM.getModule());
5409 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5410 Fn->setDoesNotRecurse();
5411 CodeGenFunction CGF(CGM);
5412 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5413 Address PrivateAddr = CGF.EmitLoadOfPointer(
5414 Ptr: CGF.GetAddrOfLocalVar(&Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5415 llvm::Value *Size = nullptr;
5416 // If the size of the reduction item is non-constant, load it from global
5417 // threadprivate variable.
5418 if (RCG.getSizes(N).second) {
5419 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5420 CGF, VarType: CGM.getContext().getSizeType(),
5421 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5422 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5423 Ty: CGM.getContext().getSizeType(), Loc);
5424 }
5425 RCG.emitAggregateType(CGF, N, Size);
5426 // Emit the finalizer body:
5427 // <destroy>(<type>* %0)
5428 RCG.emitCleanups(CGF, N, PrivateAddr);
5429 CGF.FinishFunction(EndLoc: Loc);
5430 return Fn;
5431}
5432
5433llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5434 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5435 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5436 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5437 return nullptr;
5438
5439 // Build typedef struct:
5440 // kmp_taskred_input {
5441 // void *reduce_shar; // shared reduction item
5442 // void *reduce_orig; // original reduction item used for initialization
5443 // size_t reduce_size; // size of data item
5444 // void *reduce_init; // data initialization routine
5445 // void *reduce_fini; // data finalization routine
5446 // void *reduce_comb; // data combiner routine
5447 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5448 // } kmp_taskred_input_t;
5449 ASTContext &C = CGM.getContext();
5450 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5451 RD->startDefinition();
5452 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5453 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5454 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5455 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5456 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5457 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5458 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5459 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5460 RD->completeDefinition();
5461 QualType RDType = C.getRecordType(Decl: RD);
5462 unsigned Size = Data.ReductionVars.size();
5463 llvm::APInt ArraySize(/*numBits=*/64, Size);
5464 QualType ArrayRDType =
5465 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5466 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5467 // kmp_task_red_input_t .rd_input.[Size];
5468 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5469 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5470 Data.ReductionCopies, Data.ReductionOps);
5471 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5472 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5473 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5474 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5475 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5476 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5477 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5478 Name: ".rd_input.gep.");
5479 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5480 // ElemLVal.reduce_shar = &Shareds[Cnt];
5481 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5482 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5483 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5484 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5485 // ElemLVal.reduce_orig = &Origs[Cnt];
5486 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5487 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5488 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5489 RCG.emitAggregateType(CGF, N: Cnt);
5490 llvm::Value *SizeValInChars;
5491 llvm::Value *SizeVal;
5492 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5493 // We use delayed creation/initialization for VLAs and array sections. It is
5494 // required because runtime does not provide the way to pass the sizes of
5495 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5496 // threadprivate global variables are used to store these values and use
5497 // them in the functions.
5498 bool DelayedCreation = !!SizeVal;
5499 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5500 /*isSigned=*/false);
5501 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5502 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5503 // ElemLVal.reduce_init = init;
5504 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5505 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5506 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5507 // ElemLVal.reduce_fini = fini;
5508 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5509 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5510 llvm::Value *FiniAddr =
5511 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5512 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5513 // ElemLVal.reduce_comb = comb;
5514 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5515 llvm::Value *CombAddr = emitReduceCombFunction(
5516 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5517 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5518 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5519 // ElemLVal.flags = 0;
5520 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5521 if (DelayedCreation) {
5522 CGF.EmitStoreOfScalar(
5523 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5524 lvalue: FlagsLVal);
5525 } else
5526 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(CGF),
5527 Ty: FlagsLVal.getType());
5528 }
5529 if (Data.IsReductionWithTaskMod) {
5530 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5531 // is_ws, int num, void *data);
5532 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5533 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5534 DestTy: CGM.IntTy, /*isSigned=*/true);
5535 llvm::Value *Args[] = {
5536 IdentTLoc, GTid,
5537 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5538 /*isSigned=*/IsSigned: true),
5539 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5540 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5541 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5542 return CGF.EmitRuntimeCall(
5543 callee: OMPBuilder.getOrCreateRuntimeFunction(
5544 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5545 args: Args);
5546 }
5547 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5548 llvm::Value *Args[] = {
5549 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5550 /*isSigned=*/true),
5551 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5552 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5553 DestTy: CGM.VoidPtrTy)};
5554 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5555 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5556 args: Args);
5557}
5558
5559void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5560 SourceLocation Loc,
5561 bool IsWorksharingReduction) {
5562 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5563 // is_ws, int num, void *data);
5564 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5565 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5566 DestTy: CGM.IntTy, /*isSigned=*/true);
5567 llvm::Value *Args[] = {IdentTLoc, GTid,
5568 llvm::ConstantInt::get(Ty: CGM.IntTy,
5569 V: IsWorksharingReduction ? 1 : 0,
5570 /*isSigned=*/IsSigned: true)};
5571 (void)CGF.EmitRuntimeCall(
5572 callee: OMPBuilder.getOrCreateRuntimeFunction(
5573 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5574 args: Args);
5575}
5576
5577void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5578 SourceLocation Loc,
5579 ReductionCodeGen &RCG,
5580 unsigned N) {
5581 auto Sizes = RCG.getSizes(N);
5582 // Emit threadprivate global variable if the type is non-constant
5583 // (Sizes.second = nullptr).
5584 if (Sizes.second) {
5585 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5586 /*isSigned=*/false);
5587 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5588 CGF, VarType: CGM.getContext().getSizeType(),
5589 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5590 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5591 }
5592}
5593
5594Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5595 SourceLocation Loc,
5596 llvm::Value *ReductionsPtr,
5597 LValue SharedLVal) {
5598 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5599 // *d);
5600 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5601 DestTy: CGM.IntTy,
5602 /*isSigned=*/true),
5603 ReductionsPtr,
5604 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5605 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5606 return Address(
5607 CGF.EmitRuntimeCall(
5608 callee: OMPBuilder.getOrCreateRuntimeFunction(
5609 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5610 args: Args),
5611 CGF.Int8Ty, SharedLVal.getAlignment());
5612}
5613
5614void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5615 const OMPTaskDataTy &Data) {
5616 if (!CGF.HaveInsertPoint())
5617 return;
5618
5619 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5620 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5621 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5622 } else {
5623 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5624 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5625 auto &M = CGM.getModule();
5626 Address DependenciesArray = Address::invalid();
5627 llvm::Value *NumOfElements;
5628 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5629 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5630 if (!Data.Dependences.empty()) {
5631 llvm::Value *DepWaitTaskArgs[7];
5632 DepWaitTaskArgs[0] = UpLoc;
5633 DepWaitTaskArgs[1] = ThreadID;
5634 DepWaitTaskArgs[2] = NumOfElements;
5635 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5636 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5637 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5638 DepWaitTaskArgs[6] =
5639 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5640
5641 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5642
5643 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5644 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5645 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5646 // kmp_int32 has_no_wait); if dependence info is specified.
5647 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5648 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5649 args: DepWaitTaskArgs);
5650
5651 } else {
5652
5653 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5654 // global_tid);
5655 llvm::Value *Args[] = {UpLoc, ThreadID};
5656 // Ignore return result until untied tasks are supported.
5657 CGF.EmitRuntimeCall(
5658 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5659 args: Args);
5660 }
5661 }
5662
5663 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5664 Region->emitUntiedSwitch(CGF);
5665}
5666
5667void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5668 OpenMPDirectiveKind InnerKind,
5669 const RegionCodeGenTy &CodeGen,
5670 bool HasCancel) {
5671 if (!CGF.HaveInsertPoint())
5672 return;
5673 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5674 InnerKind != OMPD_critical &&
5675 InnerKind != OMPD_master &&
5676 InnerKind != OMPD_masked);
5677 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5678}
5679
5680namespace {
5681enum RTCancelKind {
5682 CancelNoreq = 0,
5683 CancelParallel = 1,
5684 CancelLoop = 2,
5685 CancelSections = 3,
5686 CancelTaskgroup = 4
5687};
5688} // anonymous namespace
5689
5690static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5691 RTCancelKind CancelKind = CancelNoreq;
5692 if (CancelRegion == OMPD_parallel)
5693 CancelKind = CancelParallel;
5694 else if (CancelRegion == OMPD_for)
5695 CancelKind = CancelLoop;
5696 else if (CancelRegion == OMPD_sections)
5697 CancelKind = CancelSections;
5698 else {
5699 assert(CancelRegion == OMPD_taskgroup);
5700 CancelKind = CancelTaskgroup;
5701 }
5702 return CancelKind;
5703}
5704
5705void CGOpenMPRuntime::emitCancellationPointCall(
5706 CodeGenFunction &CGF, SourceLocation Loc,
5707 OpenMPDirectiveKind CancelRegion) {
5708 if (!CGF.HaveInsertPoint())
5709 return;
5710 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5711 // global_tid, kmp_int32 cncl_kind);
5712 if (auto *OMPRegionInfo =
5713 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5714 // For 'cancellation point taskgroup', the task region info may not have a
5715 // cancel. This may instead happen in another adjacent task.
5716 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5717 llvm::Value *Args[] = {
5718 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5719 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5720 // Ignore return result until untied tasks are supported.
5721 llvm::Value *Result = CGF.EmitRuntimeCall(
5722 OMPBuilder.getOrCreateRuntimeFunction(
5723 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5724 Args);
5725 // if (__kmpc_cancellationpoint()) {
5726 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5727 // exit from construct;
5728 // }
5729 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5730 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5731 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5732 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5733 CGF.EmitBlock(BB: ExitBB);
5734 if (CancelRegion == OMPD_parallel)
5735 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5736 // exit from construct;
5737 CodeGenFunction::JumpDest CancelDest =
5738 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5739 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5740 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5741 }
5742 }
5743}
5744
5745void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
5746 const Expr *IfCond,
5747 OpenMPDirectiveKind CancelRegion) {
5748 if (!CGF.HaveInsertPoint())
5749 return;
5750 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5751 // kmp_int32 cncl_kind);
5752 auto &M = CGM.getModule();
5753 if (auto *OMPRegionInfo =
5754 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5755 auto &&ThenGen = [this, &M, Loc, CancelRegion,
5756 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
5757 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5758 llvm::Value *Args[] = {
5759 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
5760 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5761 // Ignore return result until untied tasks are supported.
5762 llvm::Value *Result = CGF.EmitRuntimeCall(
5763 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), Args);
5764 // if (__kmpc_cancel()) {
5765 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5766 // exit from construct;
5767 // }
5768 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5769 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5770 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5771 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5772 CGF.EmitBlock(BB: ExitBB);
5773 if (CancelRegion == OMPD_parallel)
5774 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5775 // exit from construct;
5776 CodeGenFunction::JumpDest CancelDest =
5777 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
5778 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5779 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5780 };
5781 if (IfCond) {
5782 emitIfClause(CGF, Cond: IfCond, ThenGen,
5783 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
5784 } else {
5785 RegionCodeGenTy ThenRCG(ThenGen);
5786 ThenRCG(CGF);
5787 }
5788 }
5789}
5790
5791namespace {
5792/// Cleanup action for uses_allocators support.
5793class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
5794 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
5795
5796public:
5797 OMPUsesAllocatorsActionTy(
5798 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
5799 : Allocators(Allocators) {}
5800 void Enter(CodeGenFunction &CGF) override {
5801 if (!CGF.HaveInsertPoint())
5802 return;
5803 for (const auto &AllocatorData : Allocators) {
5804 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
5805 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
5806 }
5807 }
5808 void Exit(CodeGenFunction &CGF) override {
5809 if (!CGF.HaveInsertPoint())
5810 return;
5811 for (const auto &AllocatorData : Allocators) {
5812 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
5813 Allocator: AllocatorData.first);
5814 }
5815 }
5816};
5817} // namespace
5818
5819void CGOpenMPRuntime::emitTargetOutlinedFunction(
5820 const OMPExecutableDirective &D, StringRef ParentName,
5821 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5822 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5823 assert(!ParentName.empty() && "Invalid target entry parent name!");
5824 HasEmittedTargetRegion = true;
5825 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
5826 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
5827 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
5828 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
5829 if (!D.AllocatorTraits)
5830 continue;
5831 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
5832 }
5833 }
5834 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
5835 CodeGen.setAction(UsesAllocatorAction);
5836 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
5837 IsOffloadEntry, CodeGen);
5838}
5839
5840void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
5841 const Expr *Allocator,
5842 const Expr *AllocatorTraits) {
5843 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5844 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5845 // Use default memspace handle.
5846 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5847 llvm::Value *NumTraits = llvm::ConstantInt::get(
5848 CGF.IntTy, cast<ConstantArrayType>(
5849 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
5850 ->getSize()
5851 .getLimitedValue());
5852 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
5853 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5854 Addr: AllocatorTraitsLVal.getAddress(CGF), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
5855 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
5856 AllocatorTraitsLVal.getBaseInfo(),
5857 AllocatorTraitsLVal.getTBAAInfo());
5858 llvm::Value *Traits = Addr.emitRawPointer(CGF);
5859
5860 llvm::Value *AllocatorVal =
5861 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5862 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
5863 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
5864 // Store to allocator.
5865 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
5866 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
5867 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5868 AllocatorVal =
5869 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
5870 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
5871 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
5872}
5873
5874void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
5875 const Expr *Allocator) {
5876 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
5877 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
5878 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
5879 llvm::Value *AllocatorVal =
5880 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
5881 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
5882 DstTy: CGF.getContext().VoidPtrTy,
5883 Loc: Allocator->getExprLoc());
5884 (void)CGF.EmitRuntimeCall(
5885 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
5886 FnID: OMPRTL___kmpc_destroy_allocator),
5887 args: {ThreadId, AllocatorVal});
5888}
5889
5890void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5891 const OMPExecutableDirective &D, CodeGenFunction &CGF,
5892 int32_t &MinThreadsVal, int32_t &MaxThreadsVal, int32_t &MinTeamsVal,
5893 int32_t &MaxTeamsVal) {
5894
5895 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal, MaxTeamsVal);
5896 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
5897 /*UpperBoundOnly=*/true);
5898
5899 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5900 for (auto *A : C->getAttrs()) {
5901 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
5902 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
5903 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
5904 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
5905 &AttrMinBlocksVal, &AttrMaxBlocksVal);
5906 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
5907 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
5908 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
5909 &AttrMaxThreadsVal);
5910 else
5911 continue;
5912
5913 MinThreadsVal = std::max(a: MinThreadsVal, b: AttrMinThreadsVal);
5914 if (AttrMaxThreadsVal > 0)
5915 MaxThreadsVal = MaxThreadsVal > 0
5916 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
5917 : AttrMaxThreadsVal;
5918 MinTeamsVal = std::max(a: MinTeamsVal, b: AttrMinBlocksVal);
5919 if (AttrMaxBlocksVal > 0)
5920 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
5921 : AttrMaxBlocksVal;
5922 }
5923 }
5924}
5925
5926void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5927 const OMPExecutableDirective &D, StringRef ParentName,
5928 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
5929 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
5930
5931 llvm::TargetRegionEntryInfo EntryInfo =
5932 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
5933
5934 CodeGenFunction CGF(CGM, true);
5935 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
5936 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
5937 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
5938
5939 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
5940 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
5941 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
5942 };
5943
5944 OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction,
5945 IsOffloadEntry, OutlinedFn, OutlinedFnID);
5946
5947 if (!OutlinedFn)
5948 return;
5949
5950 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
5951
5952 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
5953 for (auto *A : C->getAttrs()) {
5954 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
5955 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
5956 }
5957 }
5958}
5959
5960/// Checks if the expression is constant or does not have non-trivial function
5961/// calls.
5962static bool isTrivial(ASTContext &Ctx, const Expr * E) {
5963 // We can skip constant expressions.
5964 // We can skip expressions with trivial calls or simple expressions.
5965 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
5966 !E->hasNonTrivialCall(Ctx)) &&
5967 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
5968}
5969
5970const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
5971 const Stmt *Body) {
5972 const Stmt *Child = Body->IgnoreContainers();
5973 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
5974 Child = nullptr;
5975 for (const Stmt *S : C->body()) {
5976 if (const auto *E = dyn_cast<Expr>(Val: S)) {
5977 if (isTrivial(Ctx, E))
5978 continue;
5979 }
5980 // Some of the statements can be ignored.
5981 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
5982 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
5983 continue;
5984 // Analyze declarations.
5985 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
5986 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
5987 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
5988 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
5989 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
5990 isa<UsingDirectiveDecl>(Val: D) ||
5991 isa<OMPDeclareReductionDecl>(Val: D) ||
5992 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
5993 return true;
5994 const auto *VD = dyn_cast<VarDecl>(Val: D);
5995 if (!VD)
5996 return false;
5997 return VD->hasGlobalStorage() || !VD->isUsed();
5998 }))
5999 continue;
6000 }
6001 // Found multiple children - cannot get the one child only.
6002 if (Child)
6003 return nullptr;
6004 Child = S;
6005 }
6006 if (Child)
6007 Child = Child->IgnoreContainers();
6008 }
6009 return Child;
6010}
6011
6012const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6013 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6014 int32_t &MaxTeamsVal) {
6015
6016 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6017 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6018 "Expected target-based executable directive.");
6019 switch (DirectiveKind) {
6020 case OMPD_target: {
6021 const auto *CS = D.getInnermostCapturedStmt();
6022 const auto *Body =
6023 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6024 const Stmt *ChildStmt =
6025 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6026 if (const auto *NestedDir =
6027 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6028 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6029 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6030 const Expr *NumTeams =
6031 NestedDir->getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6032 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6033 if (auto Constant =
6034 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6035 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6036 return NumTeams;
6037 }
6038 MinTeamsVal = MaxTeamsVal = 0;
6039 return nullptr;
6040 }
6041 if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
6042 isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
6043 MinTeamsVal = MaxTeamsVal = 1;
6044 return nullptr;
6045 }
6046 MinTeamsVal = MaxTeamsVal = 1;
6047 return nullptr;
6048 }
6049 // A value of -1 is used to check if we need to emit no teams region
6050 MinTeamsVal = MaxTeamsVal = -1;
6051 return nullptr;
6052 }
6053 case OMPD_target_teams_loop:
6054 case OMPD_target_teams:
6055 case OMPD_target_teams_distribute:
6056 case OMPD_target_teams_distribute_simd:
6057 case OMPD_target_teams_distribute_parallel_for:
6058 case OMPD_target_teams_distribute_parallel_for_simd: {
6059 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6060 const Expr *NumTeams =
6061 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
6062 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6063 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6064 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6065 return NumTeams;
6066 }
6067 MinTeamsVal = MaxTeamsVal = 0;
6068 return nullptr;
6069 }
6070 case OMPD_target_parallel:
6071 case OMPD_target_parallel_for:
6072 case OMPD_target_parallel_for_simd:
6073 case OMPD_target_parallel_loop:
6074 case OMPD_target_simd:
6075 MinTeamsVal = MaxTeamsVal = 1;
6076 return nullptr;
6077 case OMPD_parallel:
6078 case OMPD_for:
6079 case OMPD_parallel_for:
6080 case OMPD_parallel_loop:
6081 case OMPD_parallel_master:
6082 case OMPD_parallel_sections:
6083 case OMPD_for_simd:
6084 case OMPD_parallel_for_simd:
6085 case OMPD_cancel:
6086 case OMPD_cancellation_point:
6087 case OMPD_ordered:
6088 case OMPD_threadprivate:
6089 case OMPD_allocate:
6090 case OMPD_task:
6091 case OMPD_simd:
6092 case OMPD_tile:
6093 case OMPD_unroll:
6094 case OMPD_sections:
6095 case OMPD_section:
6096 case OMPD_single:
6097 case OMPD_master:
6098 case OMPD_critical:
6099 case OMPD_taskyield:
6100 case OMPD_barrier:
6101 case OMPD_taskwait:
6102 case OMPD_taskgroup:
6103 case OMPD_atomic:
6104 case OMPD_flush:
6105 case OMPD_depobj:
6106 case OMPD_scan:
6107 case OMPD_teams:
6108 case OMPD_target_data:
6109 case OMPD_target_exit_data:
6110 case OMPD_target_enter_data:
6111 case OMPD_distribute:
6112 case OMPD_distribute_simd:
6113 case OMPD_distribute_parallel_for:
6114 case OMPD_distribute_parallel_for_simd:
6115 case OMPD_teams_distribute:
6116 case OMPD_teams_distribute_simd:
6117 case OMPD_teams_distribute_parallel_for:
6118 case OMPD_teams_distribute_parallel_for_simd:
6119 case OMPD_target_update:
6120 case OMPD_declare_simd:
6121 case OMPD_declare_variant:
6122 case OMPD_begin_declare_variant:
6123 case OMPD_end_declare_variant:
6124 case OMPD_declare_target:
6125 case OMPD_end_declare_target:
6126 case OMPD_declare_reduction:
6127 case OMPD_declare_mapper:
6128 case OMPD_taskloop:
6129 case OMPD_taskloop_simd:
6130 case OMPD_master_taskloop:
6131 case OMPD_master_taskloop_simd:
6132 case OMPD_parallel_master_taskloop:
6133 case OMPD_parallel_master_taskloop_simd:
6134 case OMPD_requires:
6135 case OMPD_metadirective:
6136 case OMPD_unknown:
6137 break;
6138 default:
6139 break;
6140 }
6141 llvm_unreachable("Unexpected directive kind.");
6142}
6143
6144llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6145 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6146 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6147 "Clauses associated with the teams directive expected to be emitted "
6148 "only for the host!");
6149 CGBuilderTy &Bld = CGF.Builder;
6150 int32_t MinNT = -1, MaxNT = -1;
6151 const Expr *NumTeams =
6152 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6153 if (NumTeams != nullptr) {
6154 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6155
6156 switch (DirectiveKind) {
6157 case OMPD_target: {
6158 const auto *CS = D.getInnermostCapturedStmt();
6159 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6160 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6161 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6162 /*IgnoreResultAssign*/ true);
6163 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6164 /*isSigned=*/true);
6165 }
6166 case OMPD_target_teams:
6167 case OMPD_target_teams_distribute:
6168 case OMPD_target_teams_distribute_simd:
6169 case OMPD_target_teams_distribute_parallel_for:
6170 case OMPD_target_teams_distribute_parallel_for_simd: {
6171 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6172 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6173 /*IgnoreResultAssign*/ true);
6174 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6175 /*isSigned=*/true);
6176 }
6177 default:
6178 break;
6179 }
6180 }
6181
6182 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6183 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6184}
6185
6186/// Check for a num threads constant value (stored in \p DefaultVal), or
6187/// expression (stored in \p E). If the value is conditional (via an if-clause),
6188/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6189/// nullptr, no expression evaluation is perfomed.
6190static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6191 const Expr **E, int32_t &UpperBound,
6192 bool UpperBoundOnly, llvm::Value **CondVal) {
6193 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6194 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6195 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6196 if (!Dir)
6197 return;
6198
6199 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6200 // Handle if clause. If if clause present, the number of threads is
6201 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6202 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6203 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6204 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6205 const OMPIfClause *IfClause = nullptr;
6206 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6207 if (C->getNameModifier() == OMPD_unknown ||
6208 C->getNameModifier() == OMPD_parallel) {
6209 IfClause = C;
6210 break;
6211 }
6212 }
6213 if (IfClause) {
6214 const Expr *CondExpr = IfClause->getCondition();
6215 bool Result;
6216 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6217 if (!Result) {
6218 UpperBound = 1;
6219 return;
6220 }
6221 } else {
6222 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6223 if (const auto *PreInit =
6224 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6225 for (const auto *I : PreInit->decls()) {
6226 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6227 CGF.EmitVarDecl(cast<VarDecl>(*I));
6228 } else {
6229 CodeGenFunction::AutoVarEmission Emission =
6230 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6231 CGF.EmitAutoVarCleanups(Emission);
6232 }
6233 }
6234 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6235 }
6236 }
6237 }
6238 }
6239 // Check the value of num_threads clause iff if clause was not specified
6240 // or is not evaluated to false.
6241 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6242 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6243 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6244 const auto *NumThreadsClause =
6245 Dir->getSingleClause<OMPNumThreadsClause>();
6246 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6247 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6248 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6249 UpperBound =
6250 UpperBound
6251 ? Constant->getZExtValue()
6252 : std::min(a: UpperBound,
6253 b: static_cast<int32_t>(Constant->getZExtValue()));
6254 // If we haven't found a upper bound, remember we saw a thread limiting
6255 // clause.
6256 if (UpperBound == -1)
6257 UpperBound = 0;
6258 if (!E)
6259 return;
6260 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6261 if (const auto *PreInit =
6262 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6263 for (const auto *I : PreInit->decls()) {
6264 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6265 CGF.EmitVarDecl(cast<VarDecl>(*I));
6266 } else {
6267 CodeGenFunction::AutoVarEmission Emission =
6268 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6269 CGF.EmitAutoVarCleanups(Emission);
6270 }
6271 }
6272 }
6273 *E = NTExpr;
6274 }
6275 return;
6276 }
6277 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6278 UpperBound = 1;
6279}
6280
6281const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6282 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6283 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6284 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6285 "Clauses associated with the teams directive expected to be emitted "
6286 "only for the host!");
6287 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6288 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6289 "Expected target-based executable directive.");
6290
6291 const Expr *NT = nullptr;
6292 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6293
6294 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6295 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6296 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6297 UpperBound = UpperBound ? Constant->getZExtValue()
6298 : std::min(a: UpperBound,
6299 b: int32_t(Constant->getZExtValue()));
6300 }
6301 // If we haven't found a upper bound, remember we saw a thread limiting
6302 // clause.
6303 if (UpperBound == -1)
6304 UpperBound = 0;
6305 if (EPtr)
6306 *EPtr = E;
6307 };
6308
6309 auto ReturnSequential = [&]() {
6310 UpperBound = 1;
6311 return NT;
6312 };
6313
6314 switch (DirectiveKind) {
6315 case OMPD_target: {
6316 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6317 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6318 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6319 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6320 // TODO: The standard is not clear how to resolve two thread limit clauses,
6321 // let's pick the teams one if it's present, otherwise the target one.
6322 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6323 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6324 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6325 ThreadLimitClause = TLC;
6326 if (ThreadLimitExpr) {
6327 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6328 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6329 CodeGenFunction::LexicalScope Scope(
6330 CGF, ThreadLimitClause->getThreadLimit()->getSourceRange());
6331 if (const auto *PreInit =
6332 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6333 for (const auto *I : PreInit->decls()) {
6334 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6335 CGF.EmitVarDecl(cast<VarDecl>(*I));
6336 } else {
6337 CodeGenFunction::AutoVarEmission Emission =
6338 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6339 CGF.EmitAutoVarCleanups(Emission);
6340 }
6341 }
6342 }
6343 }
6344 }
6345 }
6346 if (ThreadLimitClause)
6347 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6348 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6349 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6350 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6351 CS = Dir->getInnermostCapturedStmt();
6352 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6353 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6354 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6355 }
6356 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6357 CS = Dir->getInnermostCapturedStmt();
6358 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6359 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6360 return ReturnSequential();
6361 }
6362 return NT;
6363 }
6364 case OMPD_target_teams: {
6365 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6366 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6367 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6368 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6369 }
6370 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6371 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6372 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6373 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6374 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6375 if (Dir->getDirectiveKind() == OMPD_distribute) {
6376 CS = Dir->getInnermostCapturedStmt();
6377 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6378 }
6379 }
6380 return NT;
6381 }
6382 case OMPD_target_teams_distribute:
6383 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6384 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6385 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6386 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6387 }
6388 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6389 UpperBoundOnly, CondVal);
6390 return NT;
6391 case OMPD_target_teams_loop:
6392 case OMPD_target_parallel_loop:
6393 case OMPD_target_parallel:
6394 case OMPD_target_parallel_for:
6395 case OMPD_target_parallel_for_simd:
6396 case OMPD_target_teams_distribute_parallel_for:
6397 case OMPD_target_teams_distribute_parallel_for_simd: {
6398 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6399 const OMPIfClause *IfClause = nullptr;
6400 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6401 if (C->getNameModifier() == OMPD_unknown ||
6402 C->getNameModifier() == OMPD_parallel) {
6403 IfClause = C;
6404 break;
6405 }
6406 }
6407 if (IfClause) {
6408 const Expr *Cond = IfClause->getCondition();
6409 bool Result;
6410 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6411 if (!Result)
6412 return ReturnSequential();
6413 } else {
6414 CodeGenFunction::RunCleanupsScope Scope(CGF);
6415 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6416 }
6417 }
6418 }
6419 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6420 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6421 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6422 CheckForConstExpr(ThreadLimitClause->getThreadLimit(), ThreadLimitExpr);
6423 }
6424 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6425 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6426 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6427 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6428 return NumThreadsClause->getNumThreads();
6429 }
6430 return NT;
6431 }
6432 case OMPD_target_teams_distribute_simd:
6433 case OMPD_target_simd:
6434 return ReturnSequential();
6435 default:
6436 break;
6437 }
6438 llvm_unreachable("Unsupported directive kind.");
6439}
6440
6441llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6442 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6443 llvm::Value *NumThreadsVal = nullptr;
6444 llvm::Value *CondVal = nullptr;
6445 llvm::Value *ThreadLimitVal = nullptr;
6446 const Expr *ThreadLimitExpr = nullptr;
6447 int32_t UpperBound = -1;
6448
6449 const Expr *NT = getNumThreadsExprForTargetDirective(
6450 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6451 ThreadLimitExpr: &ThreadLimitExpr);
6452
6453 // Thread limit expressions are used below, emit them.
6454 if (ThreadLimitExpr) {
6455 ThreadLimitVal =
6456 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6457 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6458 /*isSigned=*/false);
6459 }
6460
6461 // Generate the num teams expression.
6462 if (UpperBound == 1) {
6463 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6464 } else if (NT) {
6465 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6466 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6467 /*isSigned=*/false);
6468 } else if (ThreadLimitVal) {
6469 // If we do not have a num threads value but a thread limit, replace the
6470 // former with the latter. We know handled the thread limit expression.
6471 NumThreadsVal = ThreadLimitVal;
6472 ThreadLimitVal = nullptr;
6473 } else {
6474 // Default to "0" which means runtime choice.
6475 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6476 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6477 }
6478
6479 // Handle if clause. If if clause present, the number of threads is
6480 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6481 if (CondVal) {
6482 CodeGenFunction::RunCleanupsScope Scope(CGF);
6483 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6484 False: CGF.Builder.getInt32(C: 1));
6485 }
6486
6487 // If the thread limit and num teams expression were present, take the
6488 // minimum.
6489 if (ThreadLimitVal) {
6490 NumThreadsVal = CGF.Builder.CreateSelect(
6491 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6492 True: ThreadLimitVal, False: NumThreadsVal);
6493 }
6494
6495 return NumThreadsVal;
6496}
6497
6498namespace {
6499LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6500
6501// Utility to handle information from clauses associated with a given
6502// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6503// It provides a convenient interface to obtain the information and generate
6504// code for that information.
6505class MappableExprsHandler {
6506public:
6507 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6508 static unsigned getFlagMemberOffset() {
6509 unsigned Offset = 0;
6510 for (uint64_t Remain =
6511 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6512 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6513 !(Remain & 1); Remain = Remain >> 1)
6514 Offset++;
6515 return Offset;
6516 }
6517
6518 /// Class that holds debugging information for a data mapping to be passed to
6519 /// the runtime library.
6520 class MappingExprInfo {
6521 /// The variable declaration used for the data mapping.
6522 const ValueDecl *MapDecl = nullptr;
6523 /// The original expression used in the map clause, or null if there is
6524 /// none.
6525 const Expr *MapExpr = nullptr;
6526
6527 public:
6528 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6529 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6530
6531 const ValueDecl *getMapDecl() const { return MapDecl; }
6532 const Expr *getMapExpr() const { return MapExpr; }
6533 };
6534
6535 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6536 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6537 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6538 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6539 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6540 using MapNonContiguousArrayTy =
6541 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6542 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6543 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6544
6545 /// This structure contains combined information generated for mappable
6546 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6547 /// mappers, and non-contiguous information.
6548 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6549 MapExprsArrayTy Exprs;
6550 MapValueDeclsArrayTy Mappers;
6551 MapValueDeclsArrayTy DevicePtrDecls;
6552
6553 /// Append arrays in \a CurInfo.
6554 void append(MapCombinedInfoTy &CurInfo) {
6555 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6556 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6557 in_end: CurInfo.DevicePtrDecls.end());
6558 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6559 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6560 }
6561 };
6562
6563 /// Map between a struct and the its lowest & highest elements which have been
6564 /// mapped.
6565 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6566 /// HE(FieldIndex, Pointer)}
6567 struct StructRangeInfoTy {
6568 MapCombinedInfoTy PreliminaryMapData;
6569 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6570 0, Address::invalid()};
6571 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6572 0, Address::invalid()};
6573 Address Base = Address::invalid();
6574 Address LB = Address::invalid();
6575 bool IsArraySection = false;
6576 bool HasCompleteRecord = false;
6577 };
6578
6579private:
6580 /// Kind that defines how a device pointer has to be returned.
6581 struct MapInfo {
6582 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6583 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6584 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6585 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6586 bool ReturnDevicePointer = false;
6587 bool IsImplicit = false;
6588 const ValueDecl *Mapper = nullptr;
6589 const Expr *VarRef = nullptr;
6590 bool ForDeviceAddr = false;
6591
6592 MapInfo() = default;
6593 MapInfo(
6594 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6595 OpenMPMapClauseKind MapType,
6596 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6597 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6598 bool ReturnDevicePointer, bool IsImplicit,
6599 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6600 bool ForDeviceAddr = false)
6601 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6602 MotionModifiers(MotionModifiers),
6603 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6604 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6605 };
6606
6607 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6608 /// member and there is no map information about it, then emission of that
6609 /// entry is deferred until the whole struct has been processed.
6610 struct DeferredDevicePtrEntryTy {
6611 const Expr *IE = nullptr;
6612 const ValueDecl *VD = nullptr;
6613 bool ForDeviceAddr = false;
6614
6615 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6616 bool ForDeviceAddr)
6617 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6618 };
6619
6620 /// The target directive from where the mappable clauses were extracted. It
6621 /// is either a executable directive or a user-defined mapper directive.
6622 llvm::PointerUnion<const OMPExecutableDirective *,
6623 const OMPDeclareMapperDecl *>
6624 CurDir;
6625
6626 /// Function the directive is being generated for.
6627 CodeGenFunction &CGF;
6628
6629 /// Set of all first private variables in the current directive.
6630 /// bool data is set to true if the variable is implicitly marked as
6631 /// firstprivate, false otherwise.
6632 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6633
6634 /// Map between device pointer declarations and their expression components.
6635 /// The key value for declarations in 'this' is null.
6636 llvm::DenseMap<
6637 const ValueDecl *,
6638 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6639 DevPointersMap;
6640
6641 /// Map between device addr declarations and their expression components.
6642 /// The key value for declarations in 'this' is null.
6643 llvm::DenseMap<
6644 const ValueDecl *,
6645 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6646 HasDevAddrsMap;
6647
6648 /// Map between lambda declarations and their map type.
6649 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6650
6651 llvm::Value *getExprTypeSize(const Expr *E) const {
6652 QualType ExprTy = E->getType().getCanonicalType();
6653
6654 // Calculate the size for array shaping expression.
6655 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6656 llvm::Value *Size =
6657 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6658 for (const Expr *SE : OAE->getDimensions()) {
6659 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6660 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6661 DstTy: CGF.getContext().getSizeType(),
6662 Loc: SE->getExprLoc());
6663 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6664 }
6665 return Size;
6666 }
6667
6668 // Reference types are ignored for mapping purposes.
6669 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6670 ExprTy = RefTy->getPointeeType().getCanonicalType();
6671
6672 // Given that an array section is considered a built-in type, we need to
6673 // do the calculation based on the length of the section instead of relying
6674 // on CGF.getTypeSize(E->getType()).
6675 if (const auto *OAE = dyn_cast<OMPArraySectionExpr>(Val: E)) {
6676 QualType BaseTy = OMPArraySectionExpr::getBaseOriginalType(
6677 Base: OAE->getBase()->IgnoreParenImpCasts())
6678 .getCanonicalType();
6679
6680 // If there is no length associated with the expression and lower bound is
6681 // not specified too, that means we are using the whole length of the
6682 // base.
6683 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6684 !OAE->getLowerBound())
6685 return CGF.getTypeSize(Ty: BaseTy);
6686
6687 llvm::Value *ElemSize;
6688 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6689 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6690 } else {
6691 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6692 assert(ATy && "Expecting array type if not a pointer type.");
6693 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6694 }
6695
6696 // If we don't have a length at this point, that is because we have an
6697 // array section with a single element.
6698 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6699 return ElemSize;
6700
6701 if (const Expr *LenExpr = OAE->getLength()) {
6702 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6703 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6704 DstTy: CGF.getContext().getSizeType(),
6705 Loc: LenExpr->getExprLoc());
6706 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6707 }
6708 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6709 OAE->getLowerBound() && "expected array_section[lb:].");
6710 // Size = sizetype - lb * elemtype;
6711 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6712 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6713 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6714 DstTy: CGF.getContext().getSizeType(),
6715 Loc: OAE->getLowerBound()->getExprLoc());
6716 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6717 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6718 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6719 LengthVal = CGF.Builder.CreateSelect(
6720 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6721 return LengthVal;
6722 }
6723 return CGF.getTypeSize(Ty: ExprTy);
6724 }
6725
6726 /// Return the corresponding bits for a given map clause modifier. Add
6727 /// a flag marking the map as a pointer if requested. Add a flag marking the
6728 /// map as the first one of a series of maps that relate to the same map
6729 /// expression.
6730 OpenMPOffloadMappingFlags getMapTypeBits(
6731 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6732 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6733 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6734 OpenMPOffloadMappingFlags Bits =
6735 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6736 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
6737 switch (MapType) {
6738 case OMPC_MAP_alloc:
6739 case OMPC_MAP_release:
6740 // alloc and release is the default behavior in the runtime library, i.e.
6741 // if we don't pass any bits alloc/release that is what the runtime is
6742 // going to do. Therefore, we don't need to signal anything for these two
6743 // type modifiers.
6744 break;
6745 case OMPC_MAP_to:
6746 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
6747 break;
6748 case OMPC_MAP_from:
6749 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6750 break;
6751 case OMPC_MAP_tofrom:
6752 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
6753 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
6754 break;
6755 case OMPC_MAP_delete:
6756 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
6757 break;
6758 case OMPC_MAP_unknown:
6759 llvm_unreachable("Unexpected map type!");
6760 }
6761 if (AddPtrFlag)
6762 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
6763 if (AddIsTargetParamFlag)
6764 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
6765 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
6766 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
6767 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
6768 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
6769 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
6770 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
6771 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
6772 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
6773 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
6774 if (IsNonContiguous)
6775 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
6776 return Bits;
6777 }
6778
6779 /// Return true if the provided expression is a final array section. A
6780 /// final array section, is one whose length can't be proved to be one.
6781 bool isFinalArraySectionExpression(const Expr *E) const {
6782 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E);
6783
6784 // It is not an array section and therefore not a unity-size one.
6785 if (!OASE)
6786 return false;
6787
6788 // An array section with no colon always refer to a single element.
6789 if (OASE->getColonLocFirst().isInvalid())
6790 return false;
6791
6792 const Expr *Length = OASE->getLength();
6793
6794 // If we don't have a length we have to check if the array has size 1
6795 // for this dimension. Also, we should always expect a length if the
6796 // base type is pointer.
6797 if (!Length) {
6798 QualType BaseQTy = OMPArraySectionExpr::getBaseOriginalType(
6799 Base: OASE->getBase()->IgnoreParenImpCasts())
6800 .getCanonicalType();
6801 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
6802 return ATy->getSExtSize() != 1;
6803 // If we don't have a constant dimension length, we have to consider
6804 // the current section as having any size, so it is not necessarily
6805 // unitary. If it happen to be unity size, that's user fault.
6806 return true;
6807 }
6808
6809 // Check if the length evaluates to 1.
6810 Expr::EvalResult Result;
6811 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
6812 return true; // Can have more that size 1.
6813
6814 llvm::APSInt ConstLength = Result.Val.getInt();
6815 return ConstLength.getSExtValue() != 1;
6816 }
6817
6818 /// Generate the base pointers, section pointers, sizes, map type bits, and
6819 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6820 /// map type, map or motion modifiers, and expression components.
6821 /// \a IsFirstComponent should be set to true if the provided set of
6822 /// components is the first associated with a capture.
6823 void generateInfoForComponentList(
6824 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6825 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6826 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6827 MapCombinedInfoTy &CombinedInfo,
6828 MapCombinedInfoTy &StructBaseCombinedInfo,
6829 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
6830 bool IsImplicit, bool GenerateAllInfoForClauses,
6831 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
6832 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
6833 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
6834 OverlappedElements = std::nullopt) const {
6835 // The following summarizes what has to be generated for each map and the
6836 // types below. The generated information is expressed in this order:
6837 // base pointer, section pointer, size, flags
6838 // (to add to the ones that come from the map type and modifier).
6839 //
6840 // double d;
6841 // int i[100];
6842 // float *p;
6843 // int **a = &i;
6844 //
6845 // struct S1 {
6846 // int i;
6847 // float f[50];
6848 // }
6849 // struct S2 {
6850 // int i;
6851 // float f[50];
6852 // S1 s;
6853 // double *p;
6854 // struct S2 *ps;
6855 // int &ref;
6856 // }
6857 // S2 s;
6858 // S2 *ps;
6859 //
6860 // map(d)
6861 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6862 //
6863 // map(i)
6864 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6865 //
6866 // map(i[1:23])
6867 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6868 //
6869 // map(p)
6870 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6871 //
6872 // map(p[1:24])
6873 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6874 // in unified shared memory mode or for local pointers
6875 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6876 //
6877 // map((*a)[0:3])
6878 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6879 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6880 //
6881 // map(**a)
6882 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6883 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6884 //
6885 // map(s)
6886 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6887 //
6888 // map(s.i)
6889 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6890 //
6891 // map(s.s.f)
6892 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6893 //
6894 // map(s.p)
6895 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6896 //
6897 // map(to: s.p[:22])
6898 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6899 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6900 // &(s.p), &(s.p[0]), 22*sizeof(double),
6901 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6902 // (*) alloc space for struct members, only this is a target parameter
6903 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6904 // optimizes this entry out, same in the examples below)
6905 // (***) map the pointee (map: to)
6906 //
6907 // map(to: s.ref)
6908 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6909 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6910 // (*) alloc space for struct members, only this is a target parameter
6911 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6912 // optimizes this entry out, same in the examples below)
6913 // (***) map the pointee (map: to)
6914 //
6915 // map(s.ps)
6916 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6917 //
6918 // map(from: s.ps->s.i)
6919 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6920 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6921 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6922 //
6923 // map(to: s.ps->ps)
6924 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6925 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6926 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6927 //
6928 // map(s.ps->ps->ps)
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6932 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6933 //
6934 // map(to: s.ps->ps->s.f[:22])
6935 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6936 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6937 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6938 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6939 //
6940 // map(ps)
6941 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6942 //
6943 // map(ps->i)
6944 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6945 //
6946 // map(ps->s.f)
6947 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6948 //
6949 // map(from: ps->p)
6950 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6951 //
6952 // map(to: ps->p[:22])
6953 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6954 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6955 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6956 //
6957 // map(ps->ps)
6958 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6959 //
6960 // map(from: ps->ps->s.i)
6961 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6962 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6963 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6964 //
6965 // map(from: ps->ps->ps)
6966 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6967 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6968 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6969 //
6970 // map(ps->ps->ps->ps)
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6974 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6975 //
6976 // map(to: ps->ps->ps->s.f[:22])
6977 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6978 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6979 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6980 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6981 //
6982 // map(to: s.f[:22]) map(from: s.p[:33])
6983 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6984 // sizeof(double*) (**), TARGET_PARAM
6985 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6986 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6987 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6988 // (*) allocate contiguous space needed to fit all mapped members even if
6989 // we allocate space for members not mapped (in this example,
6990 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6991 // them as well because they fall between &s.f[0] and &s.p)
6992 //
6993 // map(from: s.f[:22]) map(to: ps->p[:33])
6994 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6995 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6996 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6997 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6998 // (*) the struct this entry pertains to is the 2nd element in the list of
6999 // arguments, hence MEMBER_OF(2)
7000 //
7001 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7002 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7003 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7004 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7008 // (*) the struct this entry pertains to is the 4th element in the list
7009 // of arguments, hence MEMBER_OF(4)
7010
7011 // Track if the map information being generated is the first for a capture.
7012 bool IsCaptureFirstInfo = IsFirstComponentList;
7013 // When the variable is on a declare target link or in a to clause with
7014 // unified memory, a reference is needed to hold the host/device address
7015 // of the variable.
7016 bool RequiresReference = false;
7017
7018 // Scan the components from the base to the complete expression.
7019 auto CI = Components.rbegin();
7020 auto CE = Components.rend();
7021 auto I = CI;
7022
7023 // Track if the map information being generated is the first for a list of
7024 // components.
7025 bool IsExpressionFirstInfo = true;
7026 bool FirstPointerInComplexData = false;
7027 Address BP = Address::invalid();
7028 const Expr *AssocExpr = I->getAssociatedExpression();
7029 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7030 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7031 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7032
7033 if (isa<MemberExpr>(Val: AssocExpr)) {
7034 // The base is the 'this' pointer. The content of the pointer is going
7035 // to be the base of the field being mapped.
7036 BP = CGF.LoadCXXThisAddress();
7037 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7038 (OASE &&
7039 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7040 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress(CGF);
7041 } else if (OAShE &&
7042 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7043 BP = Address(
7044 CGF.EmitScalarExpr(E: OAShE->getBase()),
7045 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7046 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7047 } else {
7048 // The base is the reference to the variable.
7049 // BP = &Var.
7050 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress(CGF);
7051 if (const auto *VD =
7052 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7053 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7054 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7055 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7056 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7057 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7058 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7059 RequiresReference = true;
7060 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7061 }
7062 }
7063 }
7064
7065 // If the variable is a pointer and is being dereferenced (i.e. is not
7066 // the last component), the base has to be the pointer itself, not its
7067 // reference. References are ignored for mapping purposes.
7068 QualType Ty =
7069 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7070 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7071 // No need to generate individual map information for the pointer, it
7072 // can be associated with the combined storage if shared memory mode is
7073 // active or the base declaration is not global variable.
7074 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7075 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7076 !VD || VD->hasLocalStorage())
7077 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7078 else
7079 FirstPointerInComplexData = true;
7080 ++I;
7081 }
7082 }
7083
7084 // Track whether a component of the list should be marked as MEMBER_OF some
7085 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7086 // in a component list should be marked as MEMBER_OF, all subsequent entries
7087 // do not belong to the base struct. E.g.
7088 // struct S2 s;
7089 // s.ps->ps->ps->f[:]
7090 // (1) (2) (3) (4)
7091 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7092 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7093 // is the pointee of ps(2) which is not member of struct s, so it should not
7094 // be marked as such (it is still PTR_AND_OBJ).
7095 // The variable is initialized to false so that PTR_AND_OBJ entries which
7096 // are not struct members are not considered (e.g. array of pointers to
7097 // data).
7098 bool ShouldBeMemberOf = false;
7099
7100 // Variable keeping track of whether or not we have encountered a component
7101 // in the component list which is a member expression. Useful when we have a
7102 // pointer or a final array section, in which case it is the previous
7103 // component in the list which tells us whether we have a member expression.
7104 // E.g. X.f[:]
7105 // While processing the final array section "[:]" it is "f" which tells us
7106 // whether we are dealing with a member of a declared struct.
7107 const MemberExpr *EncounteredME = nullptr;
7108
7109 // Track for the total number of dimension. Start from one for the dummy
7110 // dimension.
7111 uint64_t DimSize = 1;
7112
7113 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7114 bool IsPrevMemberReference = false;
7115
7116 // We need to check if we will be encountering any MEs. If we do not
7117 // encounter any ME expression it means we will be mapping the whole struct.
7118 // In that case we need to skip adding an entry for the struct to the
7119 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7120 // list only when generating all info for clauses.
7121 bool IsMappingWholeStruct = true;
7122 if (!GenerateAllInfoForClauses) {
7123 IsMappingWholeStruct = false;
7124 } else {
7125 for (auto TempI = I; TempI != CE; ++TempI) {
7126 const MemberExpr *PossibleME =
7127 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7128 if (PossibleME) {
7129 IsMappingWholeStruct = false;
7130 break;
7131 }
7132 }
7133 }
7134
7135 for (; I != CE; ++I) {
7136 // If the current component is member of a struct (parent struct) mark it.
7137 if (!EncounteredME) {
7138 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7139 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7140 // as MEMBER_OF the parent struct.
7141 if (EncounteredME) {
7142 ShouldBeMemberOf = true;
7143 // Do not emit as complex pointer if this is actually not array-like
7144 // expression.
7145 if (FirstPointerInComplexData) {
7146 QualType Ty = std::prev(x: I)
7147 ->getAssociatedDeclaration()
7148 ->getType()
7149 .getNonReferenceType();
7150 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7151 FirstPointerInComplexData = false;
7152 }
7153 }
7154 }
7155
7156 auto Next = std::next(x: I);
7157
7158 // We need to generate the addresses and sizes if this is the last
7159 // component, if the component is a pointer or if it is an array section
7160 // whose length can't be proved to be one. If this is a pointer, it
7161 // becomes the base address for the following components.
7162
7163 // A final array section, is one whose length can't be proved to be one.
7164 // If the map item is non-contiguous then we don't treat any array section
7165 // as final array section.
7166 bool IsFinalArraySection =
7167 !IsNonContiguous &&
7168 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7169
7170 // If we have a declaration for the mapping use that, otherwise use
7171 // the base declaration of the map clause.
7172 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7173 ? I->getAssociatedDeclaration()
7174 : BaseDecl;
7175 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7176 : MapExpr;
7177
7178 // Get information on whether the element is a pointer. Have to do a
7179 // special treatment for array sections given that they are built-in
7180 // types.
7181 const auto *OASE =
7182 dyn_cast<OMPArraySectionExpr>(Val: I->getAssociatedExpression());
7183 const auto *OAShE =
7184 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7185 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7186 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7187 bool IsPointer =
7188 OAShE ||
7189 (OASE && OMPArraySectionExpr::getBaseOriginalType(OASE)
7190 .getCanonicalType()
7191 ->isAnyPointerType()) ||
7192 I->getAssociatedExpression()->getType()->isAnyPointerType();
7193 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7194 MapDecl &&
7195 MapDecl->getType()->isLValueReferenceType();
7196 bool IsNonDerefPointer = IsPointer &&
7197 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7198 !IsNonContiguous;
7199
7200 if (OASE)
7201 ++DimSize;
7202
7203 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7204 IsFinalArraySection) {
7205 // If this is not the last component, we expect the pointer to be
7206 // associated with an array expression or member expression.
7207 assert((Next == CE ||
7208 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7209 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7210 isa<OMPArraySectionExpr>(Next->getAssociatedExpression()) ||
7211 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7212 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7213 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7214 "Unexpected expression");
7215
7216 Address LB = Address::invalid();
7217 Address LowestElem = Address::invalid();
7218 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7219 const MemberExpr *E) {
7220 const Expr *BaseExpr = E->getBase();
7221 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7222 // scalar.
7223 LValue BaseLV;
7224 if (E->isArrow()) {
7225 LValueBaseInfo BaseInfo;
7226 TBAAAccessInfo TBAAInfo;
7227 Address Addr =
7228 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7229 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7230 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7231 } else {
7232 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7233 }
7234 return BaseLV;
7235 };
7236 if (OAShE) {
7237 LowestElem = LB =
7238 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7239 CGF.ConvertTypeForMem(
7240 T: OAShE->getBase()->getType()->getPointeeType()),
7241 CGF.getContext().getTypeAlignInChars(
7242 T: OAShE->getBase()->getType()));
7243 } else if (IsMemberReference) {
7244 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7245 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7246 LowestElem = CGF.EmitLValueForFieldInitialization(
7247 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7248 .getAddress(CGF);
7249 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7250 .getAddress(CGF);
7251 } else {
7252 LowestElem = LB =
7253 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7254 .getAddress(CGF);
7255 }
7256
7257 // If this component is a pointer inside the base struct then we don't
7258 // need to create any entry for it - it will be combined with the object
7259 // it is pointing to into a single PTR_AND_OBJ entry.
7260 bool IsMemberPointerOrAddr =
7261 EncounteredME &&
7262 (((IsPointer || ForDeviceAddr) &&
7263 I->getAssociatedExpression() == EncounteredME) ||
7264 (IsPrevMemberReference && !IsPointer) ||
7265 (IsMemberReference && Next != CE &&
7266 !Next->getAssociatedExpression()->getType()->isPointerType()));
7267 if (!OverlappedElements.empty() && Next == CE) {
7268 // Handle base element with the info for overlapped elements.
7269 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7270 assert(!IsPointer &&
7271 "Unexpected base element with the pointer type.");
7272 // Mark the whole struct as the struct that requires allocation on the
7273 // device.
7274 PartialStruct.LowestElem = {0, LowestElem};
7275 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7276 T: I->getAssociatedExpression()->getType());
7277 Address HB = CGF.Builder.CreateConstGEP(
7278 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7279 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7280 Index: TypeSize.getQuantity() - 1);
7281 PartialStruct.HighestElem = {
7282 std::numeric_limits<decltype(
7283 PartialStruct.HighestElem.first)>::max(),
7284 HB};
7285 PartialStruct.Base = BP;
7286 PartialStruct.LB = LB;
7287 assert(
7288 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7289 "Overlapped elements must be used only once for the variable.");
7290 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7291 // Emit data for non-overlapped data.
7292 OpenMPOffloadMappingFlags Flags =
7293 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7294 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7295 /*AddPtrFlag=*/false,
7296 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7297 llvm::Value *Size = nullptr;
7298 // Do bitcopy of all non-overlapped structure elements.
7299 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7300 Component : OverlappedElements) {
7301 Address ComponentLB = Address::invalid();
7302 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7303 Component) {
7304 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7305 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7306 if (FD && FD->getType()->isLValueReferenceType()) {
7307 const auto *ME =
7308 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7309 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7310 ComponentLB =
7311 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7312 .getAddress(CGF);
7313 } else {
7314 ComponentLB =
7315 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7316 .getAddress(CGF);
7317 }
7318 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7319 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7320 Size = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: ComponentLBPtr,
7321 RHS: LBPtr);
7322 break;
7323 }
7324 }
7325 assert(Size && "Failed to determine structure size");
7326 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7327 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7328 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7329 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7330 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7331 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7332 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7333 CombinedInfo.Types.push_back(Elt: Flags);
7334 CombinedInfo.Mappers.push_back(Elt: nullptr);
7335 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7336 : 1);
7337 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7338 }
7339 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7340 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7341 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7342 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7343 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7344 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7345 Size = CGF.Builder.CreatePtrDiff(
7346 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF),
7347 RHS: LBPtr);
7348 CombinedInfo.Sizes.push_back(
7349 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7350 CombinedInfo.Types.push_back(Elt: Flags);
7351 CombinedInfo.Mappers.push_back(Elt: nullptr);
7352 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7353 : 1);
7354 break;
7355 }
7356 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7357 // Skip adding an entry in the CurInfo of this combined entry if the
7358 // whole struct is currently being mapped. The struct needs to be added
7359 // in the first position before any data internal to the struct is being
7360 // mapped.
7361 if (!IsMemberPointerOrAddr ||
7362 (Next == CE && MapType != OMPC_MAP_unknown)) {
7363 if (!IsMappingWholeStruct) {
7364 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7365 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7366 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7367 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7368 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7369 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7370 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7371 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7372 : 1);
7373 } else {
7374 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7375 StructBaseCombinedInfo.BasePointers.push_back(
7376 Elt: BP.emitRawPointer(CGF));
7377 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7378 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7379 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7380 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7381 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7382 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7383 Elt: IsNonContiguous ? DimSize : 1);
7384 }
7385
7386 // If Mapper is valid, the last component inherits the mapper.
7387 bool HasMapper = Mapper && Next == CE;
7388 if (!IsMappingWholeStruct)
7389 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7390 else
7391 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7392 : nullptr);
7393
7394 // We need to add a pointer flag for each map that comes from the
7395 // same expression except for the first one. We also need to signal
7396 // this map is the first one that relates with the current capture
7397 // (there is a set of entries for each capture).
7398 OpenMPOffloadMappingFlags Flags = getMapTypeBits(
7399 MapType, MapModifiers, MotionModifiers, IsImplicit,
7400 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7401 FirstPointerInComplexData || IsMemberReference,
7402 AddIsTargetParamFlag: IsCaptureFirstInfo && !RequiresReference, IsNonContiguous);
7403
7404 if (!IsExpressionFirstInfo || IsMemberReference) {
7405 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7406 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7407 if (IsPointer || (IsMemberReference && Next != CE))
7408 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7409 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7410 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7411 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7412 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7413
7414 if (ShouldBeMemberOf) {
7415 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7416 // should be later updated with the correct value of MEMBER_OF.
7417 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7418 // From now on, all subsequent PTR_AND_OBJ entries should not be
7419 // marked as MEMBER_OF.
7420 ShouldBeMemberOf = false;
7421 }
7422 }
7423
7424 if (!IsMappingWholeStruct)
7425 CombinedInfo.Types.push_back(Elt: Flags);
7426 else
7427 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7428 }
7429
7430 // If we have encountered a member expression so far, keep track of the
7431 // mapped member. If the parent is "*this", then the value declaration
7432 // is nullptr.
7433 if (EncounteredME) {
7434 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7435 unsigned FieldIndex = FD->getFieldIndex();
7436
7437 // Update info about the lowest and highest elements for this struct
7438 if (!PartialStruct.Base.isValid()) {
7439 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7440 if (IsFinalArraySection) {
7441 Address HB =
7442 CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7443 .getAddress(CGF);
7444 PartialStruct.HighestElem = {FieldIndex, HB};
7445 } else {
7446 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7447 }
7448 PartialStruct.Base = BP;
7449 PartialStruct.LB = BP;
7450 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7451 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7452 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7453 if (IsFinalArraySection) {
7454 Address HB =
7455 CGF.EmitOMPArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7456 .getAddress(CGF);
7457 PartialStruct.HighestElem = {FieldIndex, HB};
7458 } else {
7459 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7460 }
7461 }
7462 }
7463
7464 // Need to emit combined struct for array sections.
7465 if (IsFinalArraySection || IsNonContiguous)
7466 PartialStruct.IsArraySection = true;
7467
7468 // If we have a final array section, we are done with this expression.
7469 if (IsFinalArraySection)
7470 break;
7471
7472 // The pointer becomes the base for the next element.
7473 if (Next != CE)
7474 BP = IsMemberReference ? LowestElem : LB;
7475
7476 IsExpressionFirstInfo = false;
7477 IsCaptureFirstInfo = false;
7478 FirstPointerInComplexData = false;
7479 IsPrevMemberReference = IsMemberReference;
7480 } else if (FirstPointerInComplexData) {
7481 QualType Ty = Components.rbegin()
7482 ->getAssociatedDeclaration()
7483 ->getType()
7484 .getNonReferenceType();
7485 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7486 FirstPointerInComplexData = false;
7487 }
7488 }
7489 // If ran into the whole component - allocate the space for the whole
7490 // record.
7491 if (!EncounteredME)
7492 PartialStruct.HasCompleteRecord = true;
7493
7494 if (!IsNonContiguous)
7495 return;
7496
7497 const ASTContext &Context = CGF.getContext();
7498
7499 // For supporting stride in array section, we need to initialize the first
7500 // dimension size as 1, first offset as 0, and first count as 1
7501 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7502 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7503 MapValuesArrayTy CurStrides;
7504 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7505 uint64_t ElementTypeSize;
7506
7507 // Collect Size information for each dimension and get the element size as
7508 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7509 // should be [10, 10] and the first stride is 4 btyes.
7510 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7511 Components) {
7512 const Expr *AssocExpr = Component.getAssociatedExpression();
7513 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7514
7515 if (!OASE)
7516 continue;
7517
7518 QualType Ty = OMPArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7519 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7520 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7521
7522 // We need all the dimension size except for the last dimension.
7523 assert((VAT || CAT || &Component == &*Components.begin()) &&
7524 "Should be either ConstantArray or VariableArray if not the "
7525 "first Component");
7526
7527 // Get element size if CurStrides is empty.
7528 if (CurStrides.empty()) {
7529 const Type *ElementType = nullptr;
7530 if (CAT)
7531 ElementType = CAT->getElementType().getTypePtr();
7532 else if (VAT)
7533 ElementType = VAT->getElementType().getTypePtr();
7534 else
7535 assert(&Component == &*Components.begin() &&
7536 "Only expect pointer (non CAT or VAT) when this is the "
7537 "first Component");
7538 // If ElementType is null, then it means the base is a pointer
7539 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7540 // for next iteration.
7541 if (ElementType) {
7542 // For the case that having pointer as base, we need to remove one
7543 // level of indirection.
7544 if (&Component != &*Components.begin())
7545 ElementType = ElementType->getPointeeOrArrayElementType();
7546 ElementTypeSize =
7547 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7548 CurStrides.push_back(
7549 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7550 }
7551 }
7552 // Get dimension value except for the last dimension since we don't need
7553 // it.
7554 if (DimSizes.size() < Components.size() - 1) {
7555 if (CAT)
7556 DimSizes.push_back(
7557 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
7558 else if (VAT)
7559 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7560 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7561 /*IsSigned=*/isSigned: false));
7562 }
7563 }
7564
7565 // Skip the dummy dimension since we have already have its information.
7566 auto *DI = DimSizes.begin() + 1;
7567 // Product of dimension.
7568 llvm::Value *DimProd =
7569 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7570
7571 // Collect info for non-contiguous. Notice that offset, count, and stride
7572 // are only meaningful for array-section, so we insert a null for anything
7573 // other than array-section.
7574 // Also, the size of offset, count, and stride are not the same as
7575 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7576 // count, and stride are the same as the number of non-contiguous
7577 // declaration in target update to/from clause.
7578 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7579 Components) {
7580 const Expr *AssocExpr = Component.getAssociatedExpression();
7581
7582 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7583 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7584 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7585 /*isSigned=*/false);
7586 CurOffsets.push_back(Elt: Offset);
7587 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7588 CurStrides.push_back(Elt: CurStrides.back());
7589 continue;
7590 }
7591
7592 const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: AssocExpr);
7593
7594 if (!OASE)
7595 continue;
7596
7597 // Offset
7598 const Expr *OffsetExpr = OASE->getLowerBound();
7599 llvm::Value *Offset = nullptr;
7600 if (!OffsetExpr) {
7601 // If offset is absent, then we just set it to zero.
7602 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7603 } else {
7604 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7605 DestTy: CGF.Int64Ty,
7606 /*isSigned=*/false);
7607 }
7608 CurOffsets.push_back(Elt: Offset);
7609
7610 // Count
7611 const Expr *CountExpr = OASE->getLength();
7612 llvm::Value *Count = nullptr;
7613 if (!CountExpr) {
7614 // In Clang, once a high dimension is an array section, we construct all
7615 // the lower dimension as array section, however, for case like
7616 // arr[0:2][2], Clang construct the inner dimension as an array section
7617 // but it actually is not in an array section form according to spec.
7618 if (!OASE->getColonLocFirst().isValid() &&
7619 !OASE->getColonLocSecond().isValid()) {
7620 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7621 } else {
7622 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7623 // When the length is absent it defaults to ⌈(size −
7624 // lower-bound)/stride⌉, where size is the size of the array
7625 // dimension.
7626 const Expr *StrideExpr = OASE->getStride();
7627 llvm::Value *Stride =
7628 StrideExpr
7629 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7630 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7631 : nullptr;
7632 if (Stride)
7633 Count = CGF.Builder.CreateUDiv(
7634 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7635 else
7636 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7637 }
7638 } else {
7639 Count = CGF.EmitScalarExpr(E: CountExpr);
7640 }
7641 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7642 CurCounts.push_back(Elt: Count);
7643
7644 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7645 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7646 // Offset Count Stride
7647 // D0 0 1 4 (int) <- dummy dimension
7648 // D1 0 2 8 (2 * (1) * 4)
7649 // D2 1 2 20 (1 * (1 * 5) * 4)
7650 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7651 const Expr *StrideExpr = OASE->getStride();
7652 llvm::Value *Stride =
7653 StrideExpr
7654 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7655 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7656 : nullptr;
7657 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7658 if (Stride)
7659 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7660 else
7661 CurStrides.push_back(Elt: DimProd);
7662 if (DI != DimSizes.end())
7663 ++DI;
7664 }
7665
7666 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7667 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7668 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7669 }
7670
7671 /// Return the adjusted map modifiers if the declaration a capture refers to
7672 /// appears in a first-private clause. This is expected to be used only with
7673 /// directives that start with 'target'.
7674 OpenMPOffloadMappingFlags
7675 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7676 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7677
7678 // A first private variable captured by reference will use only the
7679 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7680 // declaration is known as first-private in this handler.
7681 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7682 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7683 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7684 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7685 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7686 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7687 }
7688 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7689 if (I != LambdasMap.end())
7690 // for map(to: lambda): using user specified map type.
7691 return getMapTypeBits(
7692 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7693 /*MotionModifiers=*/std::nullopt, IsImplicit: I->getSecond()->isImplicit(),
7694 /*AddPtrFlag=*/false,
7695 /*AddIsTargetParamFlag=*/false,
7696 /*isNonContiguous=*/IsNonContiguous: false);
7697 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7698 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7699 }
7700
7701 void getPlainLayout(const CXXRecordDecl *RD,
7702 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7703 bool AsBase) const {
7704 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7705
7706 llvm::StructType *St =
7707 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7708
7709 unsigned NumElements = St->getNumElements();
7710 llvm::SmallVector<
7711 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7712 RecordLayout(NumElements);
7713
7714 // Fill bases.
7715 for (const auto &I : RD->bases()) {
7716 if (I.isVirtual())
7717 continue;
7718 const auto *Base = I.getType()->getAsCXXRecordDecl();
7719 // Ignore empty bases.
7720 if (Base->isEmpty() || CGF.getContext()
7721 .getASTRecordLayout(Base)
7722 .getNonVirtualSize()
7723 .isZero())
7724 continue;
7725
7726 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
7727 RecordLayout[FieldIndex] = Base;
7728 }
7729 // Fill in virtual bases.
7730 for (const auto &I : RD->vbases()) {
7731 const auto *Base = I.getType()->getAsCXXRecordDecl();
7732 // Ignore empty bases.
7733 if (Base->isEmpty())
7734 continue;
7735 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
7736 if (RecordLayout[FieldIndex])
7737 continue;
7738 RecordLayout[FieldIndex] = Base;
7739 }
7740 // Fill in all the fields.
7741 assert(!RD->isUnion() && "Unexpected union.");
7742 for (const auto *Field : RD->fields()) {
7743 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7744 // will fill in later.)
7745 if (!Field->isBitField() && !Field->isZeroSize(CGF.getContext())) {
7746 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
7747 RecordLayout[FieldIndex] = Field;
7748 }
7749 }
7750 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
7751 &Data : RecordLayout) {
7752 if (Data.isNull())
7753 continue;
7754 if (const auto *Base = Data.dyn_cast<const CXXRecordDecl *>())
7755 getPlainLayout(Base, Layout, /*AsBase=*/true);
7756 else
7757 Layout.push_back(Data.get<const FieldDecl *>());
7758 }
7759 }
7760
7761 /// Generate all the base pointers, section pointers, sizes, map types, and
7762 /// mappers for the extracted mappable expressions (all included in \a
7763 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7764 /// pair of the relevant declaration and index where it occurs is appended to
7765 /// the device pointers info array.
7766 void generateAllInfoForClauses(
7767 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
7768 llvm::OpenMPIRBuilder &OMPBuilder,
7769 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
7770 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
7771 // We have to process the component lists that relate with the same
7772 // declaration in a single chunk so that we can generate the map flags
7773 // correctly. Therefore, we organize all lists in a map.
7774 enum MapKind { Present, Allocs, Other, Total };
7775 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7776 SmallVector<SmallVector<MapInfo, 8>, 4>>
7777 Info;
7778
7779 // Helper function to fill the information map for the different supported
7780 // clauses.
7781 auto &&InfoGen =
7782 [&Info, &SkipVarSet](
7783 const ValueDecl *D, MapKind Kind,
7784 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
7785 OpenMPMapClauseKind MapType,
7786 ArrayRef<OpenMPMapModifierKind> MapModifiers,
7787 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7788 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
7789 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
7790 if (SkipVarSet.contains(D))
7791 return;
7792 auto It = Info.find(D);
7793 if (It == Info.end())
7794 It = Info
7795 .insert(std::make_pair(
7796 x&: D, y: SmallVector<SmallVector<MapInfo, 8>, 4>(Total)))
7797 .first;
7798 It->second[Kind].emplace_back(
7799 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
7800 IsImplicit, Mapper, VarRef, ForDeviceAddr);
7801 };
7802
7803 for (const auto *Cl : Clauses) {
7804 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
7805 if (!C)
7806 continue;
7807 MapKind Kind = Other;
7808 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
7809 Element: OMPC_MAP_MODIFIER_present))
7810 Kind = Present;
7811 else if (C->getMapType() == OMPC_MAP_alloc)
7812 Kind = Allocs;
7813 const auto *EI = C->getVarRefs().begin();
7814 for (const auto L : C->component_lists()) {
7815 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
7816 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
7817 C->getMapTypeModifiers(), std::nullopt,
7818 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7819 E);
7820 ++EI;
7821 }
7822 }
7823 for (const auto *Cl : Clauses) {
7824 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
7825 if (!C)
7826 continue;
7827 MapKind Kind = Other;
7828 if (llvm::is_contained(Range: C->getMotionModifiers(),
7829 Element: OMPC_MOTION_MODIFIER_present))
7830 Kind = Present;
7831 const auto *EI = C->getVarRefs().begin();
7832 for (const auto L : C->component_lists()) {
7833 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, std::nullopt,
7834 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7835 C->isImplicit(), std::get<2>(L), *EI);
7836 ++EI;
7837 }
7838 }
7839 for (const auto *Cl : Clauses) {
7840 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
7841 if (!C)
7842 continue;
7843 MapKind Kind = Other;
7844 if (llvm::is_contained(Range: C->getMotionModifiers(),
7845 Element: OMPC_MOTION_MODIFIER_present))
7846 Kind = Present;
7847 const auto *EI = C->getVarRefs().begin();
7848 for (const auto L : C->component_lists()) {
7849 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from,
7850 std::nullopt, C->getMotionModifiers(),
7851 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
7852 *EI);
7853 ++EI;
7854 }
7855 }
7856
7857 // Look at the use_device_ptr and use_device_addr clauses information and
7858 // mark the existing map entries as such. If there is no map information for
7859 // an entry in the use_device_ptr and use_device_addr list, we create one
7860 // with map type 'alloc' and zero size section. It is the user fault if that
7861 // was not mapped before. If there is no map information and the pointer is
7862 // a struct member, then we defer the emission of that entry until the whole
7863 // struct has been processed.
7864 llvm::MapVector<CanonicalDeclPtr<const Decl>,
7865 SmallVector<DeferredDevicePtrEntryTy, 4>>
7866 DeferredInfo;
7867 MapCombinedInfoTy UseDeviceDataCombinedInfo;
7868
7869 auto &&UseDeviceDataCombinedInfoGen =
7870 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
7871 CodeGenFunction &CGF, bool IsDevAddr) {
7872 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
7873 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
7874 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
7875 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
7876 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
7877 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
7878 UseDeviceDataCombinedInfo.Sizes.push_back(
7879 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
7880 UseDeviceDataCombinedInfo.Types.push_back(
7881 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
7882 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
7883 };
7884
7885 auto &&MapInfoGen =
7886 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
7887 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
7888 OMPClauseMappableExprCommon::MappableExprComponentListRef
7889 Components,
7890 bool IsImplicit, bool IsDevAddr) {
7891 // We didn't find any match in our map information - generate a zero
7892 // size array section - if the pointer is a struct member we defer
7893 // this action until the whole struct has been processed.
7894 if (isa<MemberExpr>(Val: IE)) {
7895 // Insert the pointer into Info to be processed by
7896 // generateInfoForComponentList. Because it is a member pointer
7897 // without a pointee, no entry will be generated for it, therefore
7898 // we need to generate one after the whole struct has been
7899 // processed. Nonetheless, generateInfoForComponentList must be
7900 // called to take the pointer into account for the calculation of
7901 // the range of the partial struct.
7902 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, std::nullopt,
7903 std::nullopt, /*ReturnDevicePointer=*/false, IsImplicit,
7904 nullptr, nullptr, IsDevAddr);
7905 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
7906 } else {
7907 llvm::Value *Ptr;
7908 if (IsDevAddr) {
7909 if (IE->isGLValue())
7910 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
7911 else
7912 Ptr = CGF.EmitScalarExpr(E: IE);
7913 } else {
7914 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
7915 }
7916 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
7917 }
7918 };
7919
7920 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
7921 const Expr *IE, bool IsDevAddr) -> bool {
7922 // We potentially have map information for this declaration already.
7923 // Look for the first set of components that refer to it. If found,
7924 // return true.
7925 // If the first component is a member expression, we have to look into
7926 // 'this', which maps to null in the map of map information. Otherwise
7927 // look directly for the information.
7928 auto It = Info.find(isa<MemberExpr>(Val: IE) ? nullptr : VD);
7929 if (It != Info.end()) {
7930 bool Found = false;
7931 for (auto &Data : It->second) {
7932 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
7933 return MI.Components.back().getAssociatedDeclaration() == VD;
7934 });
7935 // If we found a map entry, signal that the pointer has to be
7936 // returned and move on to the next declaration. Exclude cases where
7937 // the base pointer is mapped as array subscript, array section or
7938 // array shaping. The base address is passed as a pointer to base in
7939 // this case and cannot be used as a base for use_device_ptr list
7940 // item.
7941 if (CI != Data.end()) {
7942 if (IsDevAddr) {
7943 CI->ForDeviceAddr = IsDevAddr;
7944 CI->ReturnDevicePointer = true;
7945 Found = true;
7946 break;
7947 } else {
7948 auto PrevCI = std::next(CI->Components.rbegin());
7949 const auto *VarD = dyn_cast<VarDecl>(VD);
7950 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7951 isa<MemberExpr>(IE) ||
7952 !VD->getType().getNonReferenceType()->isPointerType() ||
7953 PrevCI == CI->Components.rend() ||
7954 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
7955 VarD->hasLocalStorage()) {
7956 CI->ForDeviceAddr = IsDevAddr;
7957 CI->ReturnDevicePointer = true;
7958 Found = true;
7959 break;
7960 }
7961 }
7962 }
7963 }
7964 return Found;
7965 }
7966 return false;
7967 };
7968
7969 // Look at the use_device_ptr clause information and mark the existing map
7970 // entries as such. If there is no map information for an entry in the
7971 // use_device_ptr list, we create one with map type 'alloc' and zero size
7972 // section. It is the user fault if that was not mapped before. If there is
7973 // no map information and the pointer is a struct member, then we defer the
7974 // emission of that entry until the whole struct has been processed.
7975 for (const auto *Cl : Clauses) {
7976 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
7977 if (!C)
7978 continue;
7979 for (const auto L : C->component_lists()) {
7980 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
7981 std::get<1>(L);
7982 assert(!Components.empty() &&
7983 "Not expecting empty list of components!");
7984 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
7985 VD = cast<ValueDecl>(VD->getCanonicalDecl());
7986 const Expr *IE = Components.back().getAssociatedExpression();
7987 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
7988 continue;
7989 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
7990 /*IsDevAddr=*/false);
7991 }
7992 }
7993
7994 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7995 for (const auto *Cl : Clauses) {
7996 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
7997 if (!C)
7998 continue;
7999 for (const auto L : C->component_lists()) {
8000 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8001 std::get<1>(L);
8002 assert(!std::get<1>(L).empty() &&
8003 "Not expecting empty list of components!");
8004 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8005 if (!Processed.insert(VD).second)
8006 continue;
8007 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8008 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8009 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8010 continue;
8011 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8012 /*IsDevAddr=*/true);
8013 }
8014 }
8015
8016 for (const auto &Data : Info) {
8017 StructRangeInfoTy PartialStruct;
8018 // Current struct information:
8019 MapCombinedInfoTy CurInfo;
8020 // Current struct base information:
8021 MapCombinedInfoTy StructBaseCurInfo;
8022 const Decl *D = Data.first;
8023 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8024 for (const auto &M : Data.second) {
8025 for (const MapInfo &L : M) {
8026 assert(!L.Components.empty() &&
8027 "Not expecting declaration with no component lists.");
8028
8029 // Remember the current base pointer index.
8030 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8031 unsigned StructBasePointersIdx =
8032 StructBaseCurInfo.BasePointers.size();
8033 CurInfo.NonContigInfo.IsNonContiguous =
8034 L.Components.back().isNonContiguous();
8035 generateInfoForComponentList(
8036 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8037 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8038 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8039 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8040 MapExpr: L.VarRef);
8041
8042 // If this entry relates to a device pointer, set the relevant
8043 // declaration and add the 'return pointer' flag.
8044 if (L.ReturnDevicePointer) {
8045 // Check whether a value was added to either CurInfo or
8046 // StructBaseCurInfo and error if no value was added to either of
8047 // them:
8048 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8049 StructBasePointersIdx <
8050 StructBaseCurInfo.BasePointers.size()) &&
8051 "Unexpected number of mapped base pointers.");
8052
8053 // Choose a base pointer index which is always valid:
8054 const ValueDecl *RelevantVD =
8055 L.Components.back().getAssociatedDeclaration();
8056 assert(RelevantVD &&
8057 "No relevant declaration related with device pointer??");
8058
8059 // If StructBaseCurInfo has been updated this iteration then work on
8060 // the first new entry added to it i.e. make sure that when multiple
8061 // values are added to any of the lists, the first value added is
8062 // being modified by the assignments below (not the last value
8063 // added).
8064 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8065 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8066 RelevantVD;
8067 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8068 L.ForDeviceAddr ? DeviceInfoTy::Address
8069 : DeviceInfoTy::Pointer;
8070 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8071 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8072 } else {
8073 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8074 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8075 L.ForDeviceAddr ? DeviceInfoTy::Address
8076 : DeviceInfoTy::Pointer;
8077 CurInfo.Types[CurrentBasePointersIdx] |=
8078 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8079 }
8080 }
8081 }
8082 }
8083
8084 // Append any pending zero-length pointers which are struct members and
8085 // used with use_device_ptr or use_device_addr.
8086 auto CI = DeferredInfo.find(Key: Data.first);
8087 if (CI != DeferredInfo.end()) {
8088 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8089 llvm::Value *BasePtr;
8090 llvm::Value *Ptr;
8091 if (L.ForDeviceAddr) {
8092 if (L.IE->isGLValue())
8093 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8094 else
8095 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8096 BasePtr = Ptr;
8097 // Entry is RETURN_PARAM. Also, set the placeholder value
8098 // MEMBER_OF=FFFF so that the entry is later updated with the
8099 // correct value of MEMBER_OF.
8100 CurInfo.Types.push_back(
8101 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8102 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8103 } else {
8104 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8105 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8106 Loc: L.IE->getExprLoc());
8107 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8108 // placeholder value MEMBER_OF=FFFF so that the entry is later
8109 // updated with the correct value of MEMBER_OF.
8110 CurInfo.Types.push_back(
8111 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8113 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8114 }
8115 CurInfo.Exprs.push_back(Elt: L.VD);
8116 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8117 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8118 CurInfo.DevicePointers.emplace_back(
8119 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8120 CurInfo.Pointers.push_back(Elt: Ptr);
8121 CurInfo.Sizes.push_back(
8122 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8123 CurInfo.Mappers.push_back(Elt: nullptr);
8124 }
8125 }
8126
8127 // Unify entries in one list making sure the struct mapping precedes the
8128 // individual fields:
8129 MapCombinedInfoTy UnionCurInfo;
8130 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8131 UnionCurInfo.append(CurInfo);
8132
8133 // If there is an entry in PartialStruct it means we have a struct with
8134 // individual members mapped. Emit an extra combined entry.
8135 if (PartialStruct.Base.isValid()) {
8136 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8137 // Emit a combined entry:
8138 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8139 /*IsMapThis*/ !VD, OMPBuilder, VD);
8140 }
8141
8142 // We need to append the results of this capture to what we already have.
8143 CombinedInfo.append(CurInfo&: UnionCurInfo);
8144 }
8145 // Append data for use_device_ptr clauses.
8146 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8147 }
8148
8149public:
8150 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8151 : CurDir(&Dir), CGF(CGF) {
8152 // Extract firstprivate clause information.
8153 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8154 for (const auto *D : C->varlists())
8155 FirstPrivateDecls.try_emplace(
8156 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8157 // Extract implicit firstprivates from uses_allocators clauses.
8158 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8159 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8160 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8161 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8162 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8163 /*Implicit=*/Args: true);
8164 else if (const auto *VD = dyn_cast<VarDecl>(
8165 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8166 ->getDecl()))
8167 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8168 }
8169 }
8170 // Extract device pointer clause information.
8171 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8172 for (auto L : C->component_lists())
8173 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8174 // Extract device addr clause information.
8175 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8176 for (auto L : C->component_lists())
8177 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8178 // Extract map information.
8179 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8180 if (C->getMapType() != OMPC_MAP_to)
8181 continue;
8182 for (auto L : C->component_lists()) {
8183 const ValueDecl *VD = std::get<0>(L);
8184 const auto *RD = VD ? VD->getType()
8185 .getCanonicalType()
8186 .getNonReferenceType()
8187 ->getAsCXXRecordDecl()
8188 : nullptr;
8189 if (RD && RD->isLambda())
8190 LambdasMap.try_emplace(std::get<0>(L), C);
8191 }
8192 }
8193 }
8194
8195 /// Constructor for the declare mapper directive.
8196 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8197 : CurDir(&Dir), CGF(CGF) {}
8198
8199 /// Generate code for the combined entry if we have a partially mapped struct
8200 /// and take care of the mapping flags of the arguments corresponding to
8201 /// individual struct members.
8202 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8203 MapFlagsArrayTy &CurTypes,
8204 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8205 llvm::OpenMPIRBuilder &OMPBuilder,
8206 const ValueDecl *VD = nullptr,
8207 bool NotTargetParams = true) const {
8208 if (CurTypes.size() == 1 &&
8209 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8210 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8211 !PartialStruct.IsArraySection)
8212 return;
8213 Address LBAddr = PartialStruct.LowestElem.second;
8214 Address HBAddr = PartialStruct.HighestElem.second;
8215 if (PartialStruct.HasCompleteRecord) {
8216 LBAddr = PartialStruct.LB;
8217 HBAddr = PartialStruct.LB;
8218 }
8219 CombinedInfo.Exprs.push_back(Elt: VD);
8220 // Base is the base of the struct
8221 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8222 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8223 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8224 // Pointer is the address of the lowest element
8225 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8226 const CXXMethodDecl *MD =
8227 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8228 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8229 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8230 // There should not be a mapper for a combined entry.
8231 if (HasBaseClass) {
8232 // OpenMP 5.2 148:21:
8233 // If the target construct is within a class non-static member function,
8234 // and a variable is an accessible data member of the object for which the
8235 // non-static data member function is invoked, the variable is treated as
8236 // if the this[:1] expression had appeared in a map clause with a map-type
8237 // of tofrom.
8238 // Emit this[:1]
8239 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8240 QualType Ty = MD->getFunctionObjectParameterType();
8241 llvm::Value *Size =
8242 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8243 /*isSigned=*/true);
8244 CombinedInfo.Sizes.push_back(Elt: Size);
8245 } else {
8246 CombinedInfo.Pointers.push_back(Elt: LB);
8247 // Size is (addr of {highest+1} element) - (addr of lowest element)
8248 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8249 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8250 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8251 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8252 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8253 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8254 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8255 /*isSigned=*/false);
8256 CombinedInfo.Sizes.push_back(Elt: Size);
8257 }
8258 CombinedInfo.Mappers.push_back(Elt: nullptr);
8259 // Map type is always TARGET_PARAM, if generate info for captures.
8260 CombinedInfo.Types.push_back(
8261 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8262 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8263 // If any element has the present modifier, then make sure the runtime
8264 // doesn't attempt to allocate the struct.
8265 if (CurTypes.end() !=
8266 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8267 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8268 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8269 }))
8270 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8271 // Remove TARGET_PARAM flag from the first element
8272 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8273 // If any element has the ompx_hold modifier, then make sure the runtime
8274 // uses the hold reference count for the struct as a whole so that it won't
8275 // be unmapped by an extra dynamic reference count decrement. Add it to all
8276 // elements as well so the runtime knows which reference count to check
8277 // when determining whether it's time for device-to-host transfers of
8278 // individual elements.
8279 if (CurTypes.end() !=
8280 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8281 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8282 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8283 })) {
8284 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8285 for (auto &M : CurTypes)
8286 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8287 }
8288
8289 // All other current entries will be MEMBER_OF the combined entry
8290 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8291 // 0xFFFF in the MEMBER_OF field).
8292 OpenMPOffloadMappingFlags MemberOfFlag =
8293 OMPBuilder.getMemberOfFlag(Position: CombinedInfo.BasePointers.size() - 1);
8294 for (auto &M : CurTypes)
8295 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8296 }
8297
8298 /// Generate all the base pointers, section pointers, sizes, map types, and
8299 /// mappers for the extracted mappable expressions (all included in \a
8300 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8301 /// pair of the relevant declaration and index where it occurs is appended to
8302 /// the device pointers info array.
8303 void generateAllInfo(
8304 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8305 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8306 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8307 assert(CurDir.is<const OMPExecutableDirective *>() &&
8308 "Expect a executable directive");
8309 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8310 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8311 SkipVarSet);
8312 }
8313
8314 /// Generate all the base pointers, section pointers, sizes, map types, and
8315 /// mappers for the extracted map clauses of user-defined mapper (all included
8316 /// in \a CombinedInfo).
8317 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8318 llvm::OpenMPIRBuilder &OMPBuilder) const {
8319 assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
8320 "Expect a declare mapper directive");
8321 const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
8322 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8323 OMPBuilder);
8324 }
8325
8326 /// Emit capture info for lambdas for variables captured by reference.
8327 void generateInfoForLambdaCaptures(
8328 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8329 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8330 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8331 const auto *RD = VDType->getAsCXXRecordDecl();
8332 if (!RD || !RD->isLambda())
8333 return;
8334 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8335 CGF.getContext().getDeclAlign(VD));
8336 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8337 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8338 FieldDecl *ThisCapture = nullptr;
8339 RD->getCaptureFields(Captures, ThisCapture);
8340 if (ThisCapture) {
8341 LValue ThisLVal =
8342 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8343 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8344 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8345 Args: VDLVal.getPointer(CGF));
8346 CombinedInfo.Exprs.push_back(Elt: VD);
8347 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8348 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8349 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8350 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8351 CombinedInfo.Sizes.push_back(
8352 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8353 CGF.Int64Ty, /*isSigned=*/true));
8354 CombinedInfo.Types.push_back(
8355 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8356 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8357 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8358 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8359 CombinedInfo.Mappers.push_back(Elt: nullptr);
8360 }
8361 for (const LambdaCapture &LC : RD->captures()) {
8362 if (!LC.capturesVariable())
8363 continue;
8364 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8365 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8366 continue;
8367 auto It = Captures.find(VD);
8368 assert(It != Captures.end() && "Found lambda capture without field.");
8369 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8370 if (LC.getCaptureKind() == LCK_ByRef) {
8371 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8372 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8373 Args: VDLVal.getPointer(CGF));
8374 CombinedInfo.Exprs.push_back(VD);
8375 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8376 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8377 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8378 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8379 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8380 CGF.getTypeSize(
8381 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8382 CGF.Int64Ty, /*isSigned=*/true));
8383 } else {
8384 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8385 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8386 Args: VDLVal.getPointer(CGF));
8387 CombinedInfo.Exprs.push_back(VD);
8388 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8389 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8390 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8391 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8392 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8393 }
8394 CombinedInfo.Types.push_back(
8395 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8396 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8397 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8398 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8399 CombinedInfo.Mappers.push_back(Elt: nullptr);
8400 }
8401 }
8402
8403 /// Set correct indices for lambdas captures.
8404 void adjustMemberOfForLambdaCaptures(
8405 llvm::OpenMPIRBuilder &OMPBuilder,
8406 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8407 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8408 MapFlagsArrayTy &Types) const {
8409 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8410 // Set correct member_of idx for all implicit lambda captures.
8411 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8412 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8413 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8414 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8415 continue;
8416 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8417 assert(BasePtr && "Unable to find base lambda address.");
8418 int TgtIdx = -1;
8419 for (unsigned J = I; J > 0; --J) {
8420 unsigned Idx = J - 1;
8421 if (Pointers[Idx] != BasePtr)
8422 continue;
8423 TgtIdx = Idx;
8424 break;
8425 }
8426 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8427 // All other current entries will be MEMBER_OF the combined entry
8428 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8429 // 0xFFFF in the MEMBER_OF field).
8430 OpenMPOffloadMappingFlags MemberOfFlag =
8431 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8432 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8433 }
8434 }
8435
8436 /// Generate the base pointers, section pointers, sizes, map types, and
8437 /// mappers associated to a given capture (all included in \a CombinedInfo).
8438 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8439 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8440 StructRangeInfoTy &PartialStruct) const {
8441 assert(!Cap->capturesVariableArrayType() &&
8442 "Not expecting to generate map info for a variable array type!");
8443
8444 // We need to know when we generating information for the first component
8445 const ValueDecl *VD = Cap->capturesThis()
8446 ? nullptr
8447 : Cap->getCapturedVar()->getCanonicalDecl();
8448
8449 // for map(to: lambda): skip here, processing it in
8450 // generateDefaultMapInfo
8451 if (LambdasMap.count(Val: VD))
8452 return;
8453
8454 // If this declaration appears in a is_device_ptr clause we just have to
8455 // pass the pointer by value. If it is a reference to a declaration, we just
8456 // pass its value.
8457 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8458 CombinedInfo.Exprs.push_back(Elt: VD);
8459 CombinedInfo.BasePointers.emplace_back(Args&: Arg);
8460 CombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8461 CombinedInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8462 CombinedInfo.Pointers.push_back(Elt: Arg);
8463 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8464 CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8465 /*isSigned=*/true));
8466 CombinedInfo.Types.push_back(
8467 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8468 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8469 CombinedInfo.Mappers.push_back(Elt: nullptr);
8470 return;
8471 }
8472
8473 using MapData =
8474 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8475 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8476 const ValueDecl *, const Expr *>;
8477 SmallVector<MapData, 4> DeclComponentLists;
8478 // For member fields list in is_device_ptr, store it in
8479 // DeclComponentLists for generating components info.
8480 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8481 auto It = DevPointersMap.find(Val: VD);
8482 if (It != DevPointersMap.end())
8483 for (const auto &MCL : It->second)
8484 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8485 /*IsImpicit = */ true, nullptr,
8486 nullptr);
8487 auto I = HasDevAddrsMap.find(Val: VD);
8488 if (I != HasDevAddrsMap.end())
8489 for (const auto &MCL : I->second)
8490 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8491 /*IsImpicit = */ true, nullptr,
8492 nullptr);
8493 assert(CurDir.is<const OMPExecutableDirective *>() &&
8494 "Expect a executable directive");
8495 const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
8496 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8497 const auto *EI = C->getVarRefs().begin();
8498 for (const auto L : C->decl_component_lists(VD)) {
8499 const ValueDecl *VDecl, *Mapper;
8500 // The Expression is not correct if the mapping is implicit
8501 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8502 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8503 std::tie(VDecl, Components, Mapper) = L;
8504 assert(VDecl == VD && "We got information for the wrong declaration??");
8505 assert(!Components.empty() &&
8506 "Not expecting declaration with no component lists.");
8507 DeclComponentLists.emplace_back(Components, C->getMapType(),
8508 C->getMapTypeModifiers(),
8509 C->isImplicit(), Mapper, E);
8510 ++EI;
8511 }
8512 }
8513 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8514 const MapData &RHS) {
8515 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8516 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8517 bool HasPresent =
8518 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8519 bool HasAllocs = MapType == OMPC_MAP_alloc;
8520 MapModifiers = std::get<2>(t: RHS);
8521 MapType = std::get<1>(t: LHS);
8522 bool HasPresentR =
8523 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8524 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8525 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8526 });
8527
8528 // Find overlapping elements (including the offset from the base element).
8529 llvm::SmallDenseMap<
8530 const MapData *,
8531 llvm::SmallVector<
8532 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8533 4>
8534 OverlappedData;
8535 size_t Count = 0;
8536 for (const MapData &L : DeclComponentLists) {
8537 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8538 OpenMPMapClauseKind MapType;
8539 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8540 bool IsImplicit;
8541 const ValueDecl *Mapper;
8542 const Expr *VarRef;
8543 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8544 L;
8545 ++Count;
8546 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8547 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8548 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8549 args&: VarRef) = L1;
8550 auto CI = Components.rbegin();
8551 auto CE = Components.rend();
8552 auto SI = Components1.rbegin();
8553 auto SE = Components1.rend();
8554 for (; CI != CE && SI != SE; ++CI, ++SI) {
8555 if (CI->getAssociatedExpression()->getStmtClass() !=
8556 SI->getAssociatedExpression()->getStmtClass())
8557 break;
8558 // Are we dealing with different variables/fields?
8559 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8560 break;
8561 }
8562 // Found overlapping if, at least for one component, reached the head
8563 // of the components list.
8564 if (CI == CE || SI == SE) {
8565 // Ignore it if it is the same component.
8566 if (CI == CE && SI == SE)
8567 continue;
8568 const auto It = (SI == SE) ? CI : SI;
8569 // If one component is a pointer and another one is a kind of
8570 // dereference of this pointer (array subscript, section, dereference,
8571 // etc.), it is not an overlapping.
8572 // Same, if one component is a base and another component is a
8573 // dereferenced pointer memberexpr with the same base.
8574 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8575 (std::prev(x: It)->getAssociatedDeclaration() &&
8576 std::prev(x: It)
8577 ->getAssociatedDeclaration()
8578 ->getType()
8579 ->isPointerType()) ||
8580 (It->getAssociatedDeclaration() &&
8581 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8582 std::next(x: It) != CE && std::next(x: It) != SE))
8583 continue;
8584 const MapData &BaseData = CI == CE ? L : L1;
8585 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8586 SI == SE ? Components : Components1;
8587 auto &OverlappedElements = OverlappedData.FindAndConstruct(Key: &BaseData);
8588 OverlappedElements.getSecond().push_back(Elt: SubData);
8589 }
8590 }
8591 }
8592 // Sort the overlapped elements for each item.
8593 llvm::SmallVector<const FieldDecl *, 4> Layout;
8594 if (!OverlappedData.empty()) {
8595 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8596 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8597 while (BaseType != OrigType) {
8598 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8599 OrigType = BaseType->getPointeeOrArrayElementType();
8600 }
8601
8602 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8603 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8604 else {
8605 const auto *RD = BaseType->getAsRecordDecl();
8606 Layout.append(RD->field_begin(), RD->field_end());
8607 }
8608 }
8609 for (auto &Pair : OverlappedData) {
8610 llvm::stable_sort(
8611 Range&: Pair.getSecond(),
8612 C: [&Layout](
8613 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8614 OMPClauseMappableExprCommon::MappableExprComponentListRef
8615 Second) {
8616 auto CI = First.rbegin();
8617 auto CE = First.rend();
8618 auto SI = Second.rbegin();
8619 auto SE = Second.rend();
8620 for (; CI != CE && SI != SE; ++CI, ++SI) {
8621 if (CI->getAssociatedExpression()->getStmtClass() !=
8622 SI->getAssociatedExpression()->getStmtClass())
8623 break;
8624 // Are we dealing with different variables/fields?
8625 if (CI->getAssociatedDeclaration() !=
8626 SI->getAssociatedDeclaration())
8627 break;
8628 }
8629
8630 // Lists contain the same elements.
8631 if (CI == CE && SI == SE)
8632 return false;
8633
8634 // List with less elements is less than list with more elements.
8635 if (CI == CE || SI == SE)
8636 return CI == CE;
8637
8638 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8639 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8640 if (FD1->getParent() == FD2->getParent())
8641 return FD1->getFieldIndex() < FD2->getFieldIndex();
8642 const auto *It =
8643 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8644 return FD == FD1 || FD == FD2;
8645 });
8646 return *It == FD1;
8647 });
8648 }
8649
8650 // Associated with a capture, because the mapping flags depend on it.
8651 // Go through all of the elements with the overlapped elements.
8652 bool IsFirstComponentList = true;
8653 MapCombinedInfoTy StructBaseCombinedInfo;
8654 for (const auto &Pair : OverlappedData) {
8655 const MapData &L = *Pair.getFirst();
8656 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8657 OpenMPMapClauseKind MapType;
8658 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8659 bool IsImplicit;
8660 const ValueDecl *Mapper;
8661 const Expr *VarRef;
8662 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8663 L;
8664 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8665 OverlappedComponents = Pair.getSecond();
8666 generateInfoForComponentList(
8667 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8668 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8669 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8670 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
8671 IsFirstComponentList = false;
8672 }
8673 // Go through other elements without overlapped elements.
8674 for (const MapData &L : DeclComponentLists) {
8675 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8676 OpenMPMapClauseKind MapType;
8677 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8678 bool IsImplicit;
8679 const ValueDecl *Mapper;
8680 const Expr *VarRef;
8681 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8682 L;
8683 auto It = OverlappedData.find(Val: &L);
8684 if (It == OverlappedData.end())
8685 generateInfoForComponentList(
8686 MapType, MapModifiers, MotionModifiers: std::nullopt, Components, CombinedInfo,
8687 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8688 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8689 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef);
8690 IsFirstComponentList = false;
8691 }
8692 }
8693
8694 /// Generate the default map information for a given capture \a CI,
8695 /// record field declaration \a RI and captured value \a CV.
8696 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
8697 const FieldDecl &RI, llvm::Value *CV,
8698 MapCombinedInfoTy &CombinedInfo) const {
8699 bool IsImplicit = true;
8700 // Do the default mapping.
8701 if (CI.capturesThis()) {
8702 CombinedInfo.Exprs.push_back(Elt: nullptr);
8703 CombinedInfo.BasePointers.push_back(Elt: CV);
8704 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8705 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8706 CombinedInfo.Pointers.push_back(Elt: CV);
8707 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
8708 CombinedInfo.Sizes.push_back(
8709 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
8710 CGF.Int64Ty, /*isSigned=*/true));
8711 // Default map type.
8712 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
8713 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
8714 } else if (CI.capturesVariableByCopy()) {
8715 const VarDecl *VD = CI.getCapturedVar();
8716 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8717 CombinedInfo.BasePointers.push_back(Elt: CV);
8718 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8719 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8720 CombinedInfo.Pointers.push_back(Elt: CV);
8721 if (!RI.getType()->isAnyPointerType()) {
8722 // We have to signal to the runtime captures passed by value that are
8723 // not pointers.
8724 CombinedInfo.Types.push_back(
8725 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
8726 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8727 CGF.getTypeSize(Ty: RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
8728 } else {
8729 // Pointers are implicitly mapped with a zero size and no flags
8730 // (other than first map that is added for all implicit maps).
8731 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
8732 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8733 }
8734 auto I = FirstPrivateDecls.find(Val: VD);
8735 if (I != FirstPrivateDecls.end())
8736 IsImplicit = I->getSecond();
8737 } else {
8738 assert(CI.capturesVariable() && "Expected captured reference.");
8739 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
8740 QualType ElementType = PtrTy->getPointeeType();
8741 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8742 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
8743 // The default map type for a scalar/complex type is 'to' because by
8744 // default the value doesn't have to be retrieved. For an aggregate
8745 // type, the default is 'tofrom'.
8746 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
8747 const VarDecl *VD = CI.getCapturedVar();
8748 auto I = FirstPrivateDecls.find(Val: VD);
8749 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
8750 CombinedInfo.BasePointers.push_back(Elt: CV);
8751 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8752 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8753 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
8754 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
8755 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(VD),
8756 Source: AlignmentSource::Decl));
8757 CombinedInfo.Pointers.push_back(Elt: PtrAddr.emitRawPointer(CGF));
8758 } else {
8759 CombinedInfo.Pointers.push_back(Elt: CV);
8760 }
8761 if (I != FirstPrivateDecls.end())
8762 IsImplicit = I->getSecond();
8763 }
8764 // Every default map produces a single argument which is a target parameter.
8765 CombinedInfo.Types.back() |=
8766 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8767
8768 // Add flag stating this is an implicit map.
8769 if (IsImplicit)
8770 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
8771
8772 // No user-defined mapper for default mapping.
8773 CombinedInfo.Mappers.push_back(Elt: nullptr);
8774 }
8775};
8776} // anonymous namespace
8777
8778// Try to extract the base declaration from a `this->x` expression if possible.
8779static ValueDecl *getDeclFromThisExpr(const Expr *E) {
8780 if (!E)
8781 return nullptr;
8782
8783 if (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: E->IgnoreParenCasts()))
8784 if (const MemberExpr *ME =
8785 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
8786 return ME->getMemberDecl();
8787 return nullptr;
8788}
8789
8790/// Emit a string constant containing the names of the values mapped to the
8791/// offloading runtime library.
8792llvm::Constant *
8793emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
8794 MappableExprsHandler::MappingExprInfo &MapExprs) {
8795
8796 uint32_t SrcLocStrSize;
8797 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
8798 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8799
8800 SourceLocation Loc;
8801 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
8802 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
8803 Loc = VD->getLocation();
8804 else
8805 Loc = MapExprs.getMapExpr()->getExprLoc();
8806 } else {
8807 Loc = MapExprs.getMapDecl()->getLocation();
8808 }
8809
8810 std::string ExprName;
8811 if (MapExprs.getMapExpr()) {
8812 PrintingPolicy P(CGF.getContext().getLangOpts());
8813 llvm::raw_string_ostream OS(ExprName);
8814 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
8815 OS.flush();
8816 } else {
8817 ExprName = MapExprs.getMapDecl()->getNameAsString();
8818 }
8819
8820 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
8821 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: PLoc.getFilename(), FileName: ExprName,
8822 Line: PLoc.getLine(), Column: PLoc.getColumn(),
8823 SrcLocStrSize);
8824}
8825
8826/// Emit the arrays used to pass the captures and map information to the
8827/// offloading runtime library. If there is no map or capture information,
8828/// return nullptr by reference.
8829static void emitOffloadingArrays(
8830 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
8831 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
8832 bool IsNonContiguous = false) {
8833 CodeGenModule &CGM = CGF.CGM;
8834
8835 // Reset the array information.
8836 Info.clearArrayInfo();
8837 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
8838
8839 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
8840 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
8841 CGF.AllocaInsertPt->getIterator());
8842 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
8843 CGF.Builder.GetInsertPoint());
8844
8845 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
8846 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
8847 };
8848 if (CGM.getCodeGenOpts().getDebugInfo() !=
8849 llvm::codegenoptions::NoDebugInfo) {
8850 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
8851 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
8852 F: FillInfoMap);
8853 }
8854
8855 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
8856 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
8857 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
8858 }
8859 };
8860
8861 auto CustomMapperCB = [&](unsigned int I) {
8862 llvm::Value *MFunc = nullptr;
8863 if (CombinedInfo.Mappers[I]) {
8864 Info.HasMapper = true;
8865 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8866 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
8867 }
8868 return MFunc;
8869 };
8870 OMPBuilder.emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo, Info,
8871 /*IsNonContiguous=*/true, DeviceAddrCB,
8872 CustomMapperCB);
8873}
8874
8875/// Check for inner distribute directive.
8876static const OMPExecutableDirective *
8877getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
8878 const auto *CS = D.getInnermostCapturedStmt();
8879 const auto *Body =
8880 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8881 const Stmt *ChildStmt =
8882 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8883
8884 if (const auto *NestedDir =
8885 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8886 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
8887 switch (D.getDirectiveKind()) {
8888 case OMPD_target:
8889 // For now, treat 'target' with nested 'teams loop' as if it's
8890 // distributed (target teams distribute).
8891 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
8892 return NestedDir;
8893 if (DKind == OMPD_teams) {
8894 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
8895 /*IgnoreCaptured=*/true);
8896 if (!Body)
8897 return nullptr;
8898 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
8899 if (const auto *NND =
8900 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
8901 DKind = NND->getDirectiveKind();
8902 if (isOpenMPDistributeDirective(DKind))
8903 return NND;
8904 }
8905 }
8906 return nullptr;
8907 case OMPD_target_teams:
8908 if (isOpenMPDistributeDirective(DKind))
8909 return NestedDir;
8910 return nullptr;
8911 case OMPD_target_parallel:
8912 case OMPD_target_simd:
8913 case OMPD_target_parallel_for:
8914 case OMPD_target_parallel_for_simd:
8915 return nullptr;
8916 case OMPD_target_teams_distribute:
8917 case OMPD_target_teams_distribute_simd:
8918 case OMPD_target_teams_distribute_parallel_for:
8919 case OMPD_target_teams_distribute_parallel_for_simd:
8920 case OMPD_parallel:
8921 case OMPD_for:
8922 case OMPD_parallel_for:
8923 case OMPD_parallel_master:
8924 case OMPD_parallel_sections:
8925 case OMPD_for_simd:
8926 case OMPD_parallel_for_simd:
8927 case OMPD_cancel:
8928 case OMPD_cancellation_point:
8929 case OMPD_ordered:
8930 case OMPD_threadprivate:
8931 case OMPD_allocate:
8932 case OMPD_task:
8933 case OMPD_simd:
8934 case OMPD_tile:
8935 case OMPD_unroll:
8936 case OMPD_sections:
8937 case OMPD_section:
8938 case OMPD_single:
8939 case OMPD_master:
8940 case OMPD_critical:
8941 case OMPD_taskyield:
8942 case OMPD_barrier:
8943 case OMPD_taskwait:
8944 case OMPD_taskgroup:
8945 case OMPD_atomic:
8946 case OMPD_flush:
8947 case OMPD_depobj:
8948 case OMPD_scan:
8949 case OMPD_teams:
8950 case OMPD_target_data:
8951 case OMPD_target_exit_data:
8952 case OMPD_target_enter_data:
8953 case OMPD_distribute:
8954 case OMPD_distribute_simd:
8955 case OMPD_distribute_parallel_for:
8956 case OMPD_distribute_parallel_for_simd:
8957 case OMPD_teams_distribute:
8958 case OMPD_teams_distribute_simd:
8959 case OMPD_teams_distribute_parallel_for:
8960 case OMPD_teams_distribute_parallel_for_simd:
8961 case OMPD_target_update:
8962 case OMPD_declare_simd:
8963 case OMPD_declare_variant:
8964 case OMPD_begin_declare_variant:
8965 case OMPD_end_declare_variant:
8966 case OMPD_declare_target:
8967 case OMPD_end_declare_target:
8968 case OMPD_declare_reduction:
8969 case OMPD_declare_mapper:
8970 case OMPD_taskloop:
8971 case OMPD_taskloop_simd:
8972 case OMPD_master_taskloop:
8973 case OMPD_master_taskloop_simd:
8974 case OMPD_parallel_master_taskloop:
8975 case OMPD_parallel_master_taskloop_simd:
8976 case OMPD_requires:
8977 case OMPD_metadirective:
8978 case OMPD_unknown:
8979 default:
8980 llvm_unreachable("Unexpected directive.");
8981 }
8982 }
8983
8984 return nullptr;
8985}
8986
8987/// Emit the user-defined mapper function. The code generation follows the
8988/// pattern in the example below.
8989/// \code
8990/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
8991/// void *base, void *begin,
8992/// int64_t size, int64_t type,
8993/// void *name = nullptr) {
8994/// // Allocate space for an array section first or add a base/begin for
8995/// // pointer dereference.
8996/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
8997/// !maptype.IsDelete)
8998/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
8999/// size*sizeof(Ty), clearToFromMember(type));
9000/// // Map members.
9001/// for (unsigned i = 0; i < size; i++) {
9002/// // For each component specified by this mapper:
9003/// for (auto c : begin[i]->all_components) {
9004/// if (c.hasMapper())
9005/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9006/// c.arg_type, c.arg_name);
9007/// else
9008/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9009/// c.arg_begin, c.arg_size, c.arg_type,
9010/// c.arg_name);
9011/// }
9012/// }
9013/// // Delete the array section.
9014/// if (size > 1 && maptype.IsDelete)
9015/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9016/// size*sizeof(Ty), clearToFromMember(type));
9017/// }
9018/// \endcode
9019void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9020 CodeGenFunction *CGF) {
9021 if (UDMMap.count(Val: D) > 0)
9022 return;
9023 ASTContext &C = CGM.getContext();
9024 QualType Ty = D->getType();
9025 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
9026 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9027 auto *MapperVarDecl =
9028 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9029 SourceLocation Loc = D->getLocation();
9030 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9031 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9032
9033 // Prepare mapper function arguments and attributes.
9034 ImplicitParamDecl HandleArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9035 C.VoidPtrTy, ImplicitParamKind::Other);
9036 ImplicitParamDecl BaseArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9037 ImplicitParamKind::Other);
9038 ImplicitParamDecl BeginArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
9039 C.VoidPtrTy, ImplicitParamKind::Other);
9040 ImplicitParamDecl SizeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9041 ImplicitParamKind::Other);
9042 ImplicitParamDecl TypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, Int64Ty,
9043 ImplicitParamKind::Other);
9044 ImplicitParamDecl NameArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
9045 ImplicitParamKind::Other);
9046 FunctionArgList Args;
9047 Args.push_back(&HandleArg);
9048 Args.push_back(&BaseArg);
9049 Args.push_back(&BeginArg);
9050 Args.push_back(&SizeArg);
9051 Args.push_back(&TypeArg);
9052 Args.push_back(&NameArg);
9053 const CGFunctionInfo &FnInfo =
9054 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
9055 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
9056 SmallString<64> TyStr;
9057 llvm::raw_svector_ostream Out(TyStr);
9058 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9059 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9060 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
9061 N: Name, M: &CGM.getModule());
9062 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
9063 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
9064 // Start the mapper function code generation.
9065 CodeGenFunction MapperCGF(CGM);
9066 MapperCGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo, Args, Loc, StartLoc: Loc);
9067 // Compute the starting and end addresses of array elements.
9068 llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
9069 Addr: MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
9070 Ty: C.getPointerType(T: Int64Ty), Loc);
9071 // Prepare common arguments for array initiation and deletion.
9072 llvm::Value *Handle = MapperCGF.EmitLoadOfScalar(
9073 MapperCGF.GetAddrOfLocalVar(&HandleArg),
9074 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9075 llvm::Value *BaseIn = MapperCGF.EmitLoadOfScalar(
9076 MapperCGF.GetAddrOfLocalVar(&BaseArg),
9077 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9078 llvm::Value *BeginIn = MapperCGF.EmitLoadOfScalar(
9079 MapperCGF.GetAddrOfLocalVar(&BeginArg),
9080 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9081 // Convert the size in bytes into the number of array elements.
9082 Size = MapperCGF.Builder.CreateExactUDiv(
9083 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9084 llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
9085 V: BeginIn, DestTy: CGM.getTypes().ConvertTypeForMem(T: PtrTy));
9086 llvm::Value *PtrEnd = MapperCGF.Builder.CreateGEP(Ty: ElemTy, Ptr: PtrBegin, IdxList: Size);
9087 llvm::Value *MapType = MapperCGF.EmitLoadOfScalar(
9088 Addr: MapperCGF.GetAddrOfLocalVar(&TypeArg), /*Volatile=*/false,
9089 Ty: C.getPointerType(T: Int64Ty), Loc);
9090 llvm::Value *MapName = MapperCGF.EmitLoadOfScalar(
9091 MapperCGF.GetAddrOfLocalVar(&NameArg),
9092 /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
9093
9094 // Emit array initiation if this is an array section and \p MapType indicates
9095 // that memory allocation is required.
9096 llvm::BasicBlock *HeadBB = MapperCGF.createBasicBlock(name: "omp.arraymap.head");
9097 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9098 MapName, ElementSize, ExitBB: HeadBB, /*IsInit=*/true);
9099
9100 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9101
9102 // Emit the loop header block.
9103 MapperCGF.EmitBlock(BB: HeadBB);
9104 llvm::BasicBlock *BodyBB = MapperCGF.createBasicBlock(name: "omp.arraymap.body");
9105 llvm::BasicBlock *DoneBB = MapperCGF.createBasicBlock(name: "omp.done");
9106 // Evaluate whether the initial condition is satisfied.
9107 llvm::Value *IsEmpty =
9108 MapperCGF.Builder.CreateICmpEQ(LHS: PtrBegin, RHS: PtrEnd, Name: "omp.arraymap.isempty");
9109 MapperCGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
9110 llvm::BasicBlock *EntryBB = MapperCGF.Builder.GetInsertBlock();
9111
9112 // Emit the loop body block.
9113 MapperCGF.EmitBlock(BB: BodyBB);
9114 llvm::BasicBlock *LastBB = BodyBB;
9115 llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
9116 Ty: PtrBegin->getType(), NumReservedValues: 2, Name: "omp.arraymap.ptrcurrent");
9117 PtrPHI->addIncoming(V: PtrBegin, BB: EntryBB);
9118 Address PtrCurrent(PtrPHI, ElemTy,
9119 MapperCGF.GetAddrOfLocalVar(&BeginArg)
9120 .getAlignment()
9121 .alignmentOfArrayElement(elementSize: ElementSize));
9122 // Privatize the declared variable of mapper to be the current array element.
9123 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9124 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9125 (void)Scope.Privatize();
9126
9127 // Get map clause information. Fill up the arrays with all mapped variables.
9128 MappableExprsHandler::MapCombinedInfoTy Info;
9129 MappableExprsHandler MEHandler(*D, MapperCGF);
9130 MEHandler.generateAllInfoForMapper(CombinedInfo&: Info, OMPBuilder);
9131
9132 // Call the runtime API __tgt_mapper_num_components to get the number of
9133 // pre-existing components.
9134 llvm::Value *OffloadingArgs[] = {Handle};
9135 llvm::Value *PreviousSize = MapperCGF.EmitRuntimeCall(
9136 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9137 FnID: OMPRTL___tgt_mapper_num_components),
9138 args: OffloadingArgs);
9139 llvm::Value *ShiftedPreviousSize = MapperCGF.Builder.CreateShl(
9140 LHS: PreviousSize,
9141 RHS: MapperCGF.Builder.getInt64(C: MappableExprsHandler::getFlagMemberOffset()));
9142
9143 // Fill up the runtime mapper handle for all components.
9144 for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
9145 llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
9146 V: Info.BasePointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9147 llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
9148 V: Info.Pointers[I], DestTy: CGM.getTypes().ConvertTypeForMem(T: C.VoidPtrTy));
9149 llvm::Value *CurSizeArg = Info.Sizes[I];
9150 llvm::Value *CurNameArg =
9151 (CGM.getCodeGenOpts().getDebugInfo() ==
9152 llvm::codegenoptions::NoDebugInfo)
9153 ? llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy)
9154 : emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: Info.Exprs[I]);
9155
9156 // Extract the MEMBER_OF field from the map type.
9157 llvm::Value *OriMapType = MapperCGF.Builder.getInt64(
9158 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9159 Info.Types[I]));
9160 llvm::Value *MemberMapType =
9161 MapperCGF.Builder.CreateNUWAdd(LHS: OriMapType, RHS: ShiftedPreviousSize);
9162
9163 // Combine the map type inherited from user-defined mapper with that
9164 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9165 // bits of the \a MapType, which is the input argument of the mapper
9166 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9167 // bits of MemberMapType.
9168 // [OpenMP 5.0], 1.2.6. map-type decay.
9169 // | alloc | to | from | tofrom | release | delete
9170 // ----------------------------------------------------------
9171 // alloc | alloc | alloc | alloc | alloc | release | delete
9172 // to | alloc | to | alloc | to | release | delete
9173 // from | alloc | alloc | from | from | release | delete
9174 // tofrom | alloc | to | from | tofrom | release | delete
9175 llvm::Value *LeftToFrom = MapperCGF.Builder.CreateAnd(
9176 LHS: MapType,
9177 RHS: MapperCGF.Builder.getInt64(
9178 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9179 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9180 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9181 llvm::BasicBlock *AllocBB = MapperCGF.createBasicBlock(name: "omp.type.alloc");
9182 llvm::BasicBlock *AllocElseBB =
9183 MapperCGF.createBasicBlock(name: "omp.type.alloc.else");
9184 llvm::BasicBlock *ToBB = MapperCGF.createBasicBlock(name: "omp.type.to");
9185 llvm::BasicBlock *ToElseBB = MapperCGF.createBasicBlock(name: "omp.type.to.else");
9186 llvm::BasicBlock *FromBB = MapperCGF.createBasicBlock(name: "omp.type.from");
9187 llvm::BasicBlock *EndBB = MapperCGF.createBasicBlock(name: "omp.type.end");
9188 llvm::Value *IsAlloc = MapperCGF.Builder.CreateIsNull(Arg: LeftToFrom);
9189 MapperCGF.Builder.CreateCondBr(Cond: IsAlloc, True: AllocBB, False: AllocElseBB);
9190 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9191 MapperCGF.EmitBlock(BB: AllocBB);
9192 llvm::Value *AllocMapType = MapperCGF.Builder.CreateAnd(
9193 LHS: MemberMapType,
9194 RHS: MapperCGF.Builder.getInt64(
9195 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9196 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9197 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9198 MapperCGF.Builder.CreateBr(Dest: EndBB);
9199 MapperCGF.EmitBlock(BB: AllocElseBB);
9200 llvm::Value *IsTo = MapperCGF.Builder.CreateICmpEQ(
9201 LHS: LeftToFrom,
9202 RHS: MapperCGF.Builder.getInt64(
9203 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9204 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9205 MapperCGF.Builder.CreateCondBr(Cond: IsTo, True: ToBB, False: ToElseBB);
9206 // In case of to, clear OMP_MAP_FROM.
9207 MapperCGF.EmitBlock(BB: ToBB);
9208 llvm::Value *ToMapType = MapperCGF.Builder.CreateAnd(
9209 LHS: MemberMapType,
9210 RHS: MapperCGF.Builder.getInt64(
9211 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9212 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9213 MapperCGF.Builder.CreateBr(Dest: EndBB);
9214 MapperCGF.EmitBlock(BB: ToElseBB);
9215 llvm::Value *IsFrom = MapperCGF.Builder.CreateICmpEQ(
9216 LHS: LeftToFrom,
9217 RHS: MapperCGF.Builder.getInt64(
9218 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9219 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9220 MapperCGF.Builder.CreateCondBr(Cond: IsFrom, True: FromBB, False: EndBB);
9221 // In case of from, clear OMP_MAP_TO.
9222 MapperCGF.EmitBlock(BB: FromBB);
9223 llvm::Value *FromMapType = MapperCGF.Builder.CreateAnd(
9224 LHS: MemberMapType,
9225 RHS: MapperCGF.Builder.getInt64(
9226 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9227 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9228 // In case of tofrom, do nothing.
9229 MapperCGF.EmitBlock(BB: EndBB);
9230 LastBB = EndBB;
9231 llvm::PHINode *CurMapType =
9232 MapperCGF.Builder.CreatePHI(Ty: CGM.Int64Ty, NumReservedValues: 4, Name: "omp.maptype");
9233 CurMapType->addIncoming(V: AllocMapType, BB: AllocBB);
9234 CurMapType->addIncoming(V: ToMapType, BB: ToBB);
9235 CurMapType->addIncoming(V: FromMapType, BB: FromBB);
9236 CurMapType->addIncoming(V: MemberMapType, BB: ToElseBB);
9237
9238 llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
9239 CurSizeArg, CurMapType, CurNameArg};
9240 if (Info.Mappers[I]) {
9241 // Call the corresponding mapper function.
9242 llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
9243 D: cast<OMPDeclareMapperDecl>(Val: Info.Mappers[I]));
9244 assert(MapperFunc && "Expect a valid mapper function is available.");
9245 MapperCGF.EmitNounwindRuntimeCall(callee: MapperFunc, args: OffloadingArgs);
9246 } else {
9247 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9248 // data structure.
9249 MapperCGF.EmitRuntimeCall(
9250 callee: OMPBuilder.getOrCreateRuntimeFunction(
9251 M&: CGM.getModule(), FnID: OMPRTL___tgt_push_mapper_component),
9252 args: OffloadingArgs);
9253 }
9254 }
9255
9256 // Update the pointer to point to the next element that needs to be mapped,
9257 // and check whether we have mapped all elements.
9258 llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
9259 Ty: ElemTy, Ptr: PtrPHI, /*Idx0=*/1, Name: "omp.arraymap.next");
9260 PtrPHI->addIncoming(V: PtrNext, BB: LastBB);
9261 llvm::Value *IsDone =
9262 MapperCGF.Builder.CreateICmpEQ(LHS: PtrNext, RHS: PtrEnd, Name: "omp.arraymap.isdone");
9263 llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock(name: "omp.arraymap.exit");
9264 MapperCGF.Builder.CreateCondBr(Cond: IsDone, True: ExitBB, False: BodyBB);
9265
9266 MapperCGF.EmitBlock(BB: ExitBB);
9267 // Emit array deletion if this is an array section and \p MapType indicates
9268 // that deletion is required.
9269 emitUDMapperArrayInitOrDel(MapperCGF, Handle, BasePtr: BaseIn, Ptr: BeginIn, Size, MapType,
9270 MapName, ElementSize, ExitBB: DoneBB, /*IsInit=*/false);
9271
9272 // Emit the function exit block.
9273 MapperCGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
9274 MapperCGF.FinishFunction();
9275 UDMMap.try_emplace(D, Fn);
9276 if (CGF) {
9277 auto &Decls = FunctionUDMMap.FindAndConstruct(Key: CGF->CurFn);
9278 Decls.second.push_back(Elt: D);
9279 }
9280}
9281
9282/// Emit the array initialization or deletion portion for user-defined mapper
9283/// code generation. First, it evaluates whether an array section is mapped and
9284/// whether the \a MapType instructs to delete this section. If \a IsInit is
9285/// true, and \a MapType indicates to not delete this array, array
9286/// initialization code is generated. If \a IsInit is false, and \a MapType
9287/// indicates to not this array, array deletion code is generated.
9288void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9289 CodeGenFunction &MapperCGF, llvm::Value *Handle, llvm::Value *Base,
9290 llvm::Value *Begin, llvm::Value *Size, llvm::Value *MapType,
9291 llvm::Value *MapName, CharUnits ElementSize, llvm::BasicBlock *ExitBB,
9292 bool IsInit) {
9293 StringRef Prefix = IsInit ? ".init" : ".del";
9294
9295 // Evaluate if this is an array section.
9296 llvm::BasicBlock *BodyBB =
9297 MapperCGF.createBasicBlock(name: getName(Parts: {"omp.array", Prefix}));
9298 llvm::Value *IsArray = MapperCGF.Builder.CreateICmpSGT(
9299 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: 1), Name: "omp.arrayinit.isarray");
9300 llvm::Value *DeleteBit = MapperCGF.Builder.CreateAnd(
9301 LHS: MapType,
9302 RHS: MapperCGF.Builder.getInt64(
9303 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9304 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9305 llvm::Value *DeleteCond;
9306 llvm::Value *Cond;
9307 if (IsInit) {
9308 // base != begin?
9309 llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(LHS: Base, RHS: Begin);
9310 // IsPtrAndObj?
9311 llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
9312 LHS: MapType,
9313 RHS: MapperCGF.Builder.getInt64(
9314 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9315 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9316 PtrAndObjBit = MapperCGF.Builder.CreateIsNotNull(Arg: PtrAndObjBit);
9317 BaseIsBegin = MapperCGF.Builder.CreateAnd(LHS: BaseIsBegin, RHS: PtrAndObjBit);
9318 Cond = MapperCGF.Builder.CreateOr(LHS: IsArray, RHS: BaseIsBegin);
9319 DeleteCond = MapperCGF.Builder.CreateIsNull(
9320 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9321 } else {
9322 Cond = IsArray;
9323 DeleteCond = MapperCGF.Builder.CreateIsNotNull(
9324 Arg: DeleteBit, Name: getName(Parts: {"omp.array", Prefix, ".delete"}));
9325 }
9326 Cond = MapperCGF.Builder.CreateAnd(LHS: Cond, RHS: DeleteCond);
9327 MapperCGF.Builder.CreateCondBr(Cond, True: BodyBB, False: ExitBB);
9328
9329 MapperCGF.EmitBlock(BB: BodyBB);
9330 // Get the array size by multiplying element size and element number (i.e., \p
9331 // Size).
9332 llvm::Value *ArraySize = MapperCGF.Builder.CreateNUWMul(
9333 LHS: Size, RHS: MapperCGF.Builder.getInt64(C: ElementSize.getQuantity()));
9334 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9335 // memory allocation/deletion purpose only.
9336 llvm::Value *MapTypeArg = MapperCGF.Builder.CreateAnd(
9337 LHS: MapType,
9338 RHS: MapperCGF.Builder.getInt64(
9339 C: ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9340 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9341 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9342 MapTypeArg = MapperCGF.Builder.CreateOr(
9343 LHS: MapTypeArg,
9344 RHS: MapperCGF.Builder.getInt64(
9345 C: static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9346 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9347
9348 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9349 // data structure.
9350 llvm::Value *OffloadingArgs[] = {Handle, Base, Begin,
9351 ArraySize, MapTypeArg, MapName};
9352 MapperCGF.EmitRuntimeCall(
9353 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
9354 FnID: OMPRTL___tgt_push_mapper_component),
9355 args: OffloadingArgs);
9356}
9357
9358llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9359 const OMPDeclareMapperDecl *D) {
9360 auto I = UDMMap.find(Val: D);
9361 if (I != UDMMap.end())
9362 return I->second;
9363 emitUserDefinedMapper(D);
9364 return UDMMap.lookup(Val: D);
9365}
9366
9367llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9368 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9369 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9370 const OMPLoopDirective &D)>
9371 SizeEmitter) {
9372 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9373 const OMPExecutableDirective *TD = &D;
9374 // Get nested teams distribute kind directive, if any. For now, treat
9375 // 'target_teams_loop' as if it's really a target_teams_distribute.
9376 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9377 Kind != OMPD_target_teams_loop)
9378 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9379 if (!TD)
9380 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9381
9382 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9383 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9384 return NumIterations;
9385 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9386}
9387
9388static void
9389emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9390 const OMPExecutableDirective &D,
9391 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9392 bool RequiresOuterTask, const CapturedStmt &CS,
9393 bool OffloadingMandatory, CodeGenFunction &CGF) {
9394 if (OffloadingMandatory) {
9395 CGF.Builder.CreateUnreachable();
9396 } else {
9397 if (RequiresOuterTask) {
9398 CapturedVars.clear();
9399 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9400 }
9401 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9402 Args: CapturedVars);
9403 }
9404}
9405
9406static llvm::Value *emitDeviceID(
9407 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9408 CodeGenFunction &CGF) {
9409 // Emit device ID if any.
9410 llvm::Value *DeviceID;
9411 if (Device.getPointer()) {
9412 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9413 Device.getInt() == OMPC_DEVICE_device_num) &&
9414 "Expected device_num modifier.");
9415 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9416 DeviceID =
9417 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9418 } else {
9419 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9420 }
9421 return DeviceID;
9422}
9423
9424llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9425 CodeGenFunction &CGF) {
9426 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9427
9428 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9429 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9430 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9431 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9432 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9433 /*isSigned=*/false);
9434 }
9435 return DynCGroupMem;
9436}
9437
9438static void emitTargetCallKernelLaunch(
9439 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9440 const OMPExecutableDirective &D,
9441 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9442 const CapturedStmt &CS, bool OffloadingMandatory,
9443 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9444 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9445 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9446 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9447 const OMPLoopDirective &D)>
9448 SizeEmitter,
9449 CodeGenFunction &CGF, CodeGenModule &CGM) {
9450 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9451
9452 // Fill up the arrays with all the captured variables.
9453 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9454
9455 // Get mappable expression information.
9456 MappableExprsHandler MEHandler(D, CGF);
9457 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9458 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9459
9460 auto RI = CS.getCapturedRecordDecl()->field_begin();
9461 auto *CV = CapturedVars.begin();
9462 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9463 CE = CS.capture_end();
9464 CI != CE; ++CI, ++RI, ++CV) {
9465 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9466 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9467
9468 // VLA sizes are passed to the outlined region by copy and do not have map
9469 // information associated.
9470 if (CI->capturesVariableArrayType()) {
9471 CurInfo.Exprs.push_back(Elt: nullptr);
9472 CurInfo.BasePointers.push_back(Elt: *CV);
9473 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9474 CurInfo.DevicePointers.push_back(
9475 Elt: MappableExprsHandler::DeviceInfoTy::None);
9476 CurInfo.Pointers.push_back(Elt: *CV);
9477 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9478 CGF.getTypeSize(Ty: RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9479 // Copy to the device as an argument. No need to retrieve it.
9480 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9481 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9482 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9483 CurInfo.Mappers.push_back(Elt: nullptr);
9484 } else {
9485 // If we have any information in the map clause, we use it, otherwise we
9486 // just do a default mapping.
9487 MEHandler.generateInfoForCapture(Cap: CI, Arg: *CV, CombinedInfo&: CurInfo, PartialStruct);
9488 if (!CI->capturesThis())
9489 MappedVarSet.insert(CI->getCapturedVar());
9490 else
9491 MappedVarSet.insert(V: nullptr);
9492 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9493 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9494 // Generate correct mapping for variables captured by reference in
9495 // lambdas.
9496 if (CI->capturesVariable())
9497 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9498 CurInfo, LambdaPointers);
9499 }
9500 // We expect to have at least an element of information for this capture.
9501 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9502 "Non-existing map pointer for capture!");
9503 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9504 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9505 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9506 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9507 "Inconsistent map information sizes!");
9508
9509 // If there is an entry in PartialStruct it means we have a struct with
9510 // individual members mapped. Emit an extra combined entry.
9511 if (PartialStruct.Base.isValid()) {
9512 CombinedInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9513 MEHandler.emitCombinedEntry(
9514 CombinedInfo, CurTypes&: CurInfo.Types, PartialStruct, IsMapThis: CI->capturesThis(),
9515 OMPBuilder, VD: nullptr,
9516 NotTargetParams: !PartialStruct.PreliminaryMapData.BasePointers.empty());
9517 }
9518
9519 // We need to append the results of this capture to what we already have.
9520 CombinedInfo.append(CurInfo);
9521 }
9522 // Adjust MEMBER_OF flags for the lambdas captures.
9523 MEHandler.adjustMemberOfForLambdaCaptures(
9524 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9525 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9526 // Map any list items in a map clause that were not captures because they
9527 // weren't referenced within the construct.
9528 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: MappedVarSet);
9529
9530 CGOpenMPRuntime::TargetDataInfo Info;
9531 // Fill up the arrays and create the arguments.
9532 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder);
9533 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
9534 llvm::codegenoptions::NoDebugInfo;
9535 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
9536 EmitDebug,
9537 /*ForEndCall=*/false);
9538
9539 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9540 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9541 CGF.VoidPtrTy, CGM.getPointerAlign());
9542 InputInfo.PointersArray =
9543 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9544 InputInfo.SizesArray =
9545 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9546 InputInfo.MappersArray =
9547 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9548 MapTypesArray = Info.RTArgs.MapTypesArray;
9549 MapNamesArray = Info.RTArgs.MapNamesArray;
9550
9551 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9552 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9553 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9554 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9555 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9556
9557 if (IsReverseOffloading) {
9558 // Reverse offloading is not supported, so just execute on the host.
9559 // FIXME: This fallback solution is incorrect since it ignores the
9560 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9561 // assert here and ensure SEMA emits an error.
9562 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9563 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9564 return;
9565 }
9566
9567 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9568 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9569
9570 llvm::Value *BasePointersArray =
9571 InputInfo.BasePointersArray.emitRawPointer(CGF);
9572 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9573 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9574 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9575
9576 auto &&EmitTargetCallFallbackCB =
9577 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9578 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9579 -> llvm::OpenMPIRBuilder::InsertPointTy {
9580 CGF.Builder.restoreIP(IP);
9581 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9582 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9583 return CGF.Builder.saveIP();
9584 };
9585
9586 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9587 llvm::Value *NumTeams = OMPRuntime->emitNumTeamsForTargetDirective(CGF, D);
9588 llvm::Value *NumThreads =
9589 OMPRuntime->emitNumThreadsForTargetDirective(CGF, D);
9590 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9591 llvm::Value *NumIterations =
9592 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9593 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9594 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9595 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9596
9597 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9598 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9599 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9600
9601 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9602 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9603 DynCGGroupMem, HasNoWait);
9604
9605 CGF.Builder.restoreIP(IP: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9606 Loc: CGF.Builder, OutlinedFn, OutlinedFnID, EmitTargetCallFallbackCB, Args,
9607 DeviceID, RTLoc, AllocaIP));
9608 };
9609
9610 if (RequiresOuterTask)
9611 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9612 else
9613 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9614}
9615
9616static void
9617emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9618 const OMPExecutableDirective &D,
9619 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9620 bool RequiresOuterTask, const CapturedStmt &CS,
9621 bool OffloadingMandatory, CodeGenFunction &CGF) {
9622
9623 // Notify that the host version must be executed.
9624 auto &&ElseGen =
9625 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9626 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9627 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9628 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9629 };
9630
9631 if (RequiresOuterTask) {
9632 CodeGenFunction::OMPTargetDataInfo InputInfo;
9633 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9634 } else {
9635 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9636 }
9637}
9638
9639void CGOpenMPRuntime::emitTargetCall(
9640 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9641 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9642 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9643 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9644 const OMPLoopDirective &D)>
9645 SizeEmitter) {
9646 if (!CGF.HaveInsertPoint())
9647 return;
9648
9649 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9650 CGM.getLangOpts().OpenMPOffloadMandatory;
9651
9652 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9653
9654 const bool RequiresOuterTask =
9655 D.hasClausesOfKind<OMPDependClause>() ||
9656 D.hasClausesOfKind<OMPNowaitClause>() ||
9657 D.hasClausesOfKind<OMPInReductionClause>() ||
9658 (CGM.getLangOpts().OpenMP >= 51 &&
9659 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9660 D.hasClausesOfKind<OMPThreadLimitClause>());
9661 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9662 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9663 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9664 PrePostActionTy &) {
9665 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9666 };
9667 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9668
9669 CodeGenFunction::OMPTargetDataInfo InputInfo;
9670 llvm::Value *MapTypesArray = nullptr;
9671 llvm::Value *MapNamesArray = nullptr;
9672
9673 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9674 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9675 OutlinedFnID, &InputInfo, &MapTypesArray,
9676 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9677 PrePostActionTy &) {
9678 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9679 RequiresOuterTask, CS, OffloadingMandatory,
9680 Device, OutlinedFnID, InputInfo, MapTypesArray,
9681 MapNamesArray, SizeEmitter, CGF, CGM);
9682 };
9683
9684 auto &&TargetElseGen =
9685 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9686 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9687 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9688 CS, OffloadingMandatory, CGF);
9689 };
9690
9691 // If we have a target function ID it means that we need to support
9692 // offloading, otherwise, just execute on the host. We need to execute on host
9693 // regardless of the conditional in the if clause if, e.g., the user do not
9694 // specify target triples.
9695 if (OutlinedFnID) {
9696 if (IfCond) {
9697 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9698 } else {
9699 RegionCodeGenTy ThenRCG(TargetThenGen);
9700 ThenRCG(CGF);
9701 }
9702 } else {
9703 RegionCodeGenTy ElseRCG(TargetElseGen);
9704 ElseRCG(CGF);
9705 }
9706}
9707
9708void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9709 StringRef ParentName) {
9710 if (!S)
9711 return;
9712
9713 // Codegen OMP target directives that offload compute to the device.
9714 bool RequiresDeviceCodegen =
9715 isa<OMPExecutableDirective>(S) &&
9716 isOpenMPTargetExecutionDirective(
9717 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9718
9719 if (RequiresDeviceCodegen) {
9720 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9721
9722 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9723 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9724
9725 // Is this a target region that should not be emitted as an entry point? If
9726 // so just signal we are done with this target region.
9727 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9728 return;
9729
9730 switch (E.getDirectiveKind()) {
9731 case OMPD_target:
9732 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9733 S: cast<OMPTargetDirective>(Val: E));
9734 break;
9735 case OMPD_target_parallel:
9736 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9737 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9738 break;
9739 case OMPD_target_teams:
9740 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9741 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9742 break;
9743 case OMPD_target_teams_distribute:
9744 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9745 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9746 break;
9747 case OMPD_target_teams_distribute_simd:
9748 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9749 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9750 break;
9751 case OMPD_target_parallel_for:
9752 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9753 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9754 break;
9755 case OMPD_target_parallel_for_simd:
9756 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9757 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9758 break;
9759 case OMPD_target_simd:
9760 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9761 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9762 break;
9763 case OMPD_target_teams_distribute_parallel_for:
9764 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9765 CGM, ParentName,
9766 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9767 break;
9768 case OMPD_target_teams_distribute_parallel_for_simd:
9769 CodeGenFunction::
9770 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9771 CGM, ParentName,
9772 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9773 break;
9774 case OMPD_target_teams_loop:
9775 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9776 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9777 break;
9778 case OMPD_target_parallel_loop:
9779 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9780 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9781 break;
9782 case OMPD_parallel:
9783 case OMPD_for:
9784 case OMPD_parallel_for:
9785 case OMPD_parallel_master:
9786 case OMPD_parallel_sections:
9787 case OMPD_for_simd:
9788 case OMPD_parallel_for_simd:
9789 case OMPD_cancel:
9790 case OMPD_cancellation_point:
9791 case OMPD_ordered:
9792 case OMPD_threadprivate:
9793 case OMPD_allocate:
9794 case OMPD_task:
9795 case OMPD_simd:
9796 case OMPD_tile:
9797 case OMPD_unroll:
9798 case OMPD_sections:
9799 case OMPD_section:
9800 case OMPD_single:
9801 case OMPD_master:
9802 case OMPD_critical:
9803 case OMPD_taskyield:
9804 case OMPD_barrier:
9805 case OMPD_taskwait:
9806 case OMPD_taskgroup:
9807 case OMPD_atomic:
9808 case OMPD_flush:
9809 case OMPD_depobj:
9810 case OMPD_scan:
9811 case OMPD_teams:
9812 case OMPD_target_data:
9813 case OMPD_target_exit_data:
9814 case OMPD_target_enter_data:
9815 case OMPD_distribute:
9816 case OMPD_distribute_simd:
9817 case OMPD_distribute_parallel_for:
9818 case OMPD_distribute_parallel_for_simd:
9819 case OMPD_teams_distribute:
9820 case OMPD_teams_distribute_simd:
9821 case OMPD_teams_distribute_parallel_for:
9822 case OMPD_teams_distribute_parallel_for_simd:
9823 case OMPD_target_update:
9824 case OMPD_declare_simd:
9825 case OMPD_declare_variant:
9826 case OMPD_begin_declare_variant:
9827 case OMPD_end_declare_variant:
9828 case OMPD_declare_target:
9829 case OMPD_end_declare_target:
9830 case OMPD_declare_reduction:
9831 case OMPD_declare_mapper:
9832 case OMPD_taskloop:
9833 case OMPD_taskloop_simd:
9834 case OMPD_master_taskloop:
9835 case OMPD_master_taskloop_simd:
9836 case OMPD_parallel_master_taskloop:
9837 case OMPD_parallel_master_taskloop_simd:
9838 case OMPD_requires:
9839 case OMPD_metadirective:
9840 case OMPD_unknown:
9841 default:
9842 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9843 }
9844 return;
9845 }
9846
9847 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9848 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9849 return;
9850
9851 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9852 return;
9853 }
9854
9855 // If this is a lambda function, look into its body.
9856 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9857 S = L->getBody();
9858
9859 // Keep looking for target regions recursively.
9860 for (const Stmt *II : S->children())
9861 scanForTargetRegionsFunctions(S: II, ParentName);
9862}
9863
9864static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9865 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9866 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9867 if (!DevTy)
9868 return false;
9869 // Do not emit device_type(nohost) functions for the host.
9870 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9871 return true;
9872 // Do not emit device_type(host) functions for the device.
9873 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9874 return true;
9875 return false;
9876}
9877
9878bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9879 // If emitting code for the host, we do not process FD here. Instead we do
9880 // the normal code generation.
9881 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9882 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
9883 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9884 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9885 return true;
9886 return false;
9887 }
9888
9889 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
9890 // Try to detect target regions in the function.
9891 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
9892 StringRef Name = CGM.getMangledName(GD);
9893 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
9894 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9895 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9896 return true;
9897 }
9898
9899 // Do not to emit function if it is not marked as declare target.
9900 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9901 AlreadyEmittedTargetDecls.count(VD) == 0;
9902}
9903
9904bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9905 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
9906 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9907 return true;
9908
9909 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9910 return false;
9911
9912 // Check if there are Ctors/Dtors in this declaration and look for target
9913 // regions in it. We use the complete variant to produce the kernel name
9914 // mangling.
9915 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
9916 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9917 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
9918 StringRef ParentName =
9919 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
9920 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
9921 }
9922 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
9923 StringRef ParentName =
9924 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
9925 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
9926 }
9927 }
9928
9929 // Do not to emit variable if it is not marked as declare target.
9930 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9931 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9932 cast<VarDecl>(GD.getDecl()));
9933 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
9934 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
9935 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
9936 HasRequiresUnifiedSharedMemory)) {
9937 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
9938 return true;
9939 }
9940 return false;
9941}
9942
9943void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
9944 llvm::Constant *Addr) {
9945 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
9946 !CGM.getLangOpts().OpenMPIsTargetDevice)
9947 return;
9948
9949 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
9950 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
9951
9952 // If this is an 'extern' declaration we defer to the canonical definition and
9953 // do not emit an offloading entry.
9954 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
9955 VD->hasExternalStorage())
9956 return;
9957
9958 if (!Res) {
9959 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
9960 // Register non-target variables being emitted in device code (debug info
9961 // may cause this).
9962 StringRef VarName = CGM.getMangledName(GD: VD);
9963 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
9964 }
9965 return;
9966 }
9967
9968 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
9969 auto LinkageForVariable = [&VD, this]() {
9970 return CGM.getLLVMLinkageVarDefinition(VD);
9971 };
9972
9973 std::vector<llvm::GlobalVariable *> GeneratedRefs;
9974 OMPBuilder.registerTargetGlobalVariable(
9975 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
9976 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
9977 IsExternallyVisible: VD->isExternallyVisible(),
9978 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
9979 VD->getCanonicalDecl()->getBeginLoc()),
9980 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
9981 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
9982 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
9983 T: CGM.getContext().getPointerType(VD->getType())),
9984 Addr);
9985
9986 for (auto *ref : GeneratedRefs)
9987 CGM.addCompilerUsedGlobal(GV: ref);
9988}
9989
9990bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
9991 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
9992 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
9993 return emitTargetFunctions(GD);
9994
9995 return emitTargetGlobalVariable(GD);
9996}
9997
9998void CGOpenMPRuntime::emitDeferredTargetDecls() const {
9999 for (const VarDecl *VD : DeferredGlobalVariables) {
10000 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10001 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10002 if (!Res)
10003 continue;
10004 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10005 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10006 !HasRequiresUnifiedSharedMemory) {
10007 CGM.EmitGlobal(D: VD);
10008 } else {
10009 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10010 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10011 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10012 HasRequiresUnifiedSharedMemory)) &&
10013 "Expected link clause or to clause with unified memory.");
10014 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10015 }
10016 }
10017}
10018
10019void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10020 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10021 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10022 " Expected target-based directive.");
10023}
10024
10025void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10026 for (const OMPClause *Clause : D->clauselists()) {
10027 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10028 HasRequiresUnifiedSharedMemory = true;
10029 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10030 } else if (const auto *AC =
10031 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10032 switch (AC->getAtomicDefaultMemOrderKind()) {
10033 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10034 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10035 break;
10036 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10037 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10038 break;
10039 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10040 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10041 break;
10042 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10043 break;
10044 }
10045 }
10046 }
10047}
10048
10049llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10050 return RequiresAtomicOrdering;
10051}
10052
10053bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10054 LangAS &AS) {
10055 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10056 return false;
10057 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10058 switch(A->getAllocatorType()) {
10059 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10060 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10061 // Not supported, fallback to the default mem space.
10062 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10063 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10064 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10065 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10066 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10067 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10068 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10069 AS = LangAS::Default;
10070 return true;
10071 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10072 llvm_unreachable("Expected predefined allocator for the variables with the "
10073 "static storage.");
10074 }
10075 return false;
10076}
10077
10078bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10079 return HasRequiresUnifiedSharedMemory;
10080}
10081
10082CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10083 CodeGenModule &CGM)
10084 : CGM(CGM) {
10085 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10086 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10087 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10088 }
10089}
10090
10091CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10092 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10093 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10094}
10095
10096bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10097 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10098 return true;
10099
10100 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10101 // Do not to emit function if it is marked as declare target as it was already
10102 // emitted.
10103 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10104 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10105 if (auto *F = dyn_cast_or_null<llvm::Function>(
10106 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10107 return !F->isDeclaration();
10108 return false;
10109 }
10110 return true;
10111 }
10112
10113 return !AlreadyEmittedTargetDecls.insert(D).second;
10114}
10115
10116void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10117 const OMPExecutableDirective &D,
10118 SourceLocation Loc,
10119 llvm::Function *OutlinedFn,
10120 ArrayRef<llvm::Value *> CapturedVars) {
10121 if (!CGF.HaveInsertPoint())
10122 return;
10123
10124 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10125 CodeGenFunction::RunCleanupsScope Scope(CGF);
10126
10127 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10128 llvm::Value *Args[] = {
10129 RTLoc,
10130 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10131 CGF.Builder.CreateBitCast(V: OutlinedFn, DestTy: getKmpc_MicroPointerTy())};
10132 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10133 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10134 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10135
10136 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10137 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10138 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10139}
10140
10141void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10142 const Expr *NumTeams,
10143 const Expr *ThreadLimit,
10144 SourceLocation Loc) {
10145 if (!CGF.HaveInsertPoint())
10146 return;
10147
10148 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10149
10150 llvm::Value *NumTeamsVal =
10151 NumTeams
10152 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10153 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10154 : CGF.Builder.getInt32(C: 0);
10155
10156 llvm::Value *ThreadLimitVal =
10157 ThreadLimit
10158 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10159 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10160 : CGF.Builder.getInt32(C: 0);
10161
10162 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10163 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10164 ThreadLimitVal};
10165 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10166 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10167 args: PushNumTeamsArgs);
10168}
10169
10170void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10171 const Expr *ThreadLimit,
10172 SourceLocation Loc) {
10173 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10174 llvm::Value *ThreadLimitVal =
10175 ThreadLimit
10176 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10177 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10178 : CGF.Builder.getInt32(C: 0);
10179
10180 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10181 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10182 ThreadLimitVal};
10183 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10184 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10185 args: ThreadLimitArgs);
10186}
10187
10188void CGOpenMPRuntime::emitTargetDataCalls(
10189 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10190 const Expr *Device, const RegionCodeGenTy &CodeGen,
10191 CGOpenMPRuntime::TargetDataInfo &Info) {
10192 if (!CGF.HaveInsertPoint())
10193 return;
10194
10195 // Action used to replace the default codegen action and turn privatization
10196 // off.
10197 PrePostActionTy NoPrivAction;
10198
10199 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10200
10201 llvm::Value *IfCondVal = nullptr;
10202 if (IfCond)
10203 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10204
10205 // Emit device ID if any.
10206 llvm::Value *DeviceID = nullptr;
10207 if (Device) {
10208 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10209 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10210 } else {
10211 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10212 }
10213
10214 // Fill up the arrays with all the mapped variables.
10215 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10216 auto GenMapInfoCB =
10217 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10218 CGF.Builder.restoreIP(IP: CodeGenIP);
10219 // Get map clause information.
10220 MappableExprsHandler MEHandler(D, CGF);
10221 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10222
10223 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10224 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10225 };
10226 if (CGM.getCodeGenOpts().getDebugInfo() !=
10227 llvm::codegenoptions::NoDebugInfo) {
10228 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10229 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10230 F: FillInfoMap);
10231 }
10232
10233 return CombinedInfo;
10234 };
10235 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10236 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10237 CGF.Builder.restoreIP(IP: CodeGenIP);
10238 switch (BodyGenType) {
10239 case BodyGenTy::Priv:
10240 if (!Info.CaptureDeviceAddrMap.empty())
10241 CodeGen(CGF);
10242 break;
10243 case BodyGenTy::DupNoPriv:
10244 if (!Info.CaptureDeviceAddrMap.empty()) {
10245 CodeGen.setAction(NoPrivAction);
10246 CodeGen(CGF);
10247 }
10248 break;
10249 case BodyGenTy::NoPriv:
10250 if (Info.CaptureDeviceAddrMap.empty()) {
10251 CodeGen.setAction(NoPrivAction);
10252 CodeGen(CGF);
10253 }
10254 break;
10255 }
10256 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10257 CGF.Builder.GetInsertPoint());
10258 };
10259
10260 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10261 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10262 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10263 }
10264 };
10265
10266 auto CustomMapperCB = [&](unsigned int I) {
10267 llvm::Value *MFunc = nullptr;
10268 if (CombinedInfo.Mappers[I]) {
10269 Info.HasMapper = true;
10270 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10271 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10272 }
10273 return MFunc;
10274 };
10275
10276 // Source location for the ident struct
10277 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10278
10279 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10280 CGF.AllocaInsertPt->getIterator());
10281 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10282 CGF.Builder.GetInsertPoint());
10283 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10284 CGF.Builder.restoreIP(IP: OMPBuilder.createTargetData(
10285 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10286 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, CustomMapperCB, SrcLocInfo: RTLoc));
10287}
10288
10289void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10290 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10291 const Expr *Device) {
10292 if (!CGF.HaveInsertPoint())
10293 return;
10294
10295 assert((isa<OMPTargetEnterDataDirective>(D) ||
10296 isa<OMPTargetExitDataDirective>(D) ||
10297 isa<OMPTargetUpdateDirective>(D)) &&
10298 "Expecting either target enter, exit data, or update directives.");
10299
10300 CodeGenFunction::OMPTargetDataInfo InputInfo;
10301 llvm::Value *MapTypesArray = nullptr;
10302 llvm::Value *MapNamesArray = nullptr;
10303 // Generate the code for the opening of the data environment.
10304 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10305 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10306 // Emit device ID if any.
10307 llvm::Value *DeviceID = nullptr;
10308 if (Device) {
10309 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10310 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10311 } else {
10312 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10313 }
10314
10315 // Emit the number of elements in the offloading arrays.
10316 llvm::Constant *PointerNum =
10317 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10318
10319 // Source location for the ident struct
10320 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10321
10322 llvm::Value *OffloadingArgs[] = {
10323 RTLoc,
10324 DeviceID,
10325 PointerNum,
10326 InputInfo.BasePointersArray.emitRawPointer(CGF),
10327 InputInfo.PointersArray.emitRawPointer(CGF),
10328 InputInfo.SizesArray.emitRawPointer(CGF),
10329 MapTypesArray,
10330 MapNamesArray,
10331 InputInfo.MappersArray.emitRawPointer(CGF)};
10332
10333 // Select the right runtime function call for each standalone
10334 // directive.
10335 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10336 RuntimeFunction RTLFn;
10337 switch (D.getDirectiveKind()) {
10338 case OMPD_target_enter_data:
10339 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10340 : OMPRTL___tgt_target_data_begin_mapper;
10341 break;
10342 case OMPD_target_exit_data:
10343 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10344 : OMPRTL___tgt_target_data_end_mapper;
10345 break;
10346 case OMPD_target_update:
10347 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10348 : OMPRTL___tgt_target_data_update_mapper;
10349 break;
10350 case OMPD_parallel:
10351 case OMPD_for:
10352 case OMPD_parallel_for:
10353 case OMPD_parallel_master:
10354 case OMPD_parallel_sections:
10355 case OMPD_for_simd:
10356 case OMPD_parallel_for_simd:
10357 case OMPD_cancel:
10358 case OMPD_cancellation_point:
10359 case OMPD_ordered:
10360 case OMPD_threadprivate:
10361 case OMPD_allocate:
10362 case OMPD_task:
10363 case OMPD_simd:
10364 case OMPD_tile:
10365 case OMPD_unroll:
10366 case OMPD_sections:
10367 case OMPD_section:
10368 case OMPD_single:
10369 case OMPD_master:
10370 case OMPD_critical:
10371 case OMPD_taskyield:
10372 case OMPD_barrier:
10373 case OMPD_taskwait:
10374 case OMPD_taskgroup:
10375 case OMPD_atomic:
10376 case OMPD_flush:
10377 case OMPD_depobj:
10378 case OMPD_scan:
10379 case OMPD_teams:
10380 case OMPD_target_data:
10381 case OMPD_distribute:
10382 case OMPD_distribute_simd:
10383 case OMPD_distribute_parallel_for:
10384 case OMPD_distribute_parallel_for_simd:
10385 case OMPD_teams_distribute:
10386 case OMPD_teams_distribute_simd:
10387 case OMPD_teams_distribute_parallel_for:
10388 case OMPD_teams_distribute_parallel_for_simd:
10389 case OMPD_declare_simd:
10390 case OMPD_declare_variant:
10391 case OMPD_begin_declare_variant:
10392 case OMPD_end_declare_variant:
10393 case OMPD_declare_target:
10394 case OMPD_end_declare_target:
10395 case OMPD_declare_reduction:
10396 case OMPD_declare_mapper:
10397 case OMPD_taskloop:
10398 case OMPD_taskloop_simd:
10399 case OMPD_master_taskloop:
10400 case OMPD_master_taskloop_simd:
10401 case OMPD_parallel_master_taskloop:
10402 case OMPD_parallel_master_taskloop_simd:
10403 case OMPD_target:
10404 case OMPD_target_simd:
10405 case OMPD_target_teams_distribute:
10406 case OMPD_target_teams_distribute_simd:
10407 case OMPD_target_teams_distribute_parallel_for:
10408 case OMPD_target_teams_distribute_parallel_for_simd:
10409 case OMPD_target_teams:
10410 case OMPD_target_parallel:
10411 case OMPD_target_parallel_for:
10412 case OMPD_target_parallel_for_simd:
10413 case OMPD_requires:
10414 case OMPD_metadirective:
10415 case OMPD_unknown:
10416 default:
10417 llvm_unreachable("Unexpected standalone target data directive.");
10418 break;
10419 }
10420 CGF.EmitRuntimeCall(
10421 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10422 args: OffloadingArgs);
10423 };
10424
10425 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10426 &MapNamesArray](CodeGenFunction &CGF,
10427 PrePostActionTy &) {
10428 // Fill up the arrays with all the mapped variables.
10429 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10430
10431 // Get map clause information.
10432 MappableExprsHandler MEHandler(D, CGF);
10433 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10434
10435 CGOpenMPRuntime::TargetDataInfo Info;
10436 // Fill up the arrays and create the arguments.
10437 emitOffloadingArrays(CGF, CombinedInfo, Info, OMPBuilder,
10438 /*IsNonContiguous=*/true);
10439 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10440 D.hasClausesOfKind<OMPNowaitClause>();
10441 bool EmitDebug = CGF.CGM.getCodeGenOpts().getDebugInfo() !=
10442 llvm::codegenoptions::NoDebugInfo;
10443 OMPBuilder.emitOffloadingArraysArgument(Builder&: CGF.Builder, RTArgs&: Info.RTArgs, Info,
10444 EmitDebug,
10445 /*ForEndCall=*/false);
10446 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10447 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10448 CGF.VoidPtrTy, CGM.getPointerAlign());
10449 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10450 CGM.getPointerAlign());
10451 InputInfo.SizesArray =
10452 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10453 InputInfo.MappersArray =
10454 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10455 MapTypesArray = Info.RTArgs.MapTypesArray;
10456 MapNamesArray = Info.RTArgs.MapNamesArray;
10457 if (RequiresOuterTask)
10458 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10459 else
10460 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10461 };
10462
10463 if (IfCond) {
10464 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10465 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10466 } else {
10467 RegionCodeGenTy ThenRCG(TargetThenGen);
10468 ThenRCG(CGF);
10469 }
10470}
10471
10472namespace {
10473 /// Kind of parameter in a function with 'declare simd' directive.
10474enum ParamKindTy {
10475 Linear,
10476 LinearRef,
10477 LinearUVal,
10478 LinearVal,
10479 Uniform,
10480 Vector,
10481};
10482/// Attribute set of the parameter.
10483struct ParamAttrTy {
10484 ParamKindTy Kind = Vector;
10485 llvm::APSInt StrideOrArg;
10486 llvm::APSInt Alignment;
10487 bool HasVarStride = false;
10488};
10489} // namespace
10490
10491static unsigned evaluateCDTSize(const FunctionDecl *FD,
10492 ArrayRef<ParamAttrTy> ParamAttrs) {
10493 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10494 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10495 // of that clause. The VLEN value must be power of 2.
10496 // In other case the notion of the function`s "characteristic data type" (CDT)
10497 // is used to compute the vector length.
10498 // CDT is defined in the following order:
10499 // a) For non-void function, the CDT is the return type.
10500 // b) If the function has any non-uniform, non-linear parameters, then the
10501 // CDT is the type of the first such parameter.
10502 // c) If the CDT determined by a) or b) above is struct, union, or class
10503 // type which is pass-by-value (except for the type that maps to the
10504 // built-in complex data type), the characteristic data type is int.
10505 // d) If none of the above three cases is applicable, the CDT is int.
10506 // The VLEN is then determined based on the CDT and the size of vector
10507 // register of that ISA for which current vector version is generated. The
10508 // VLEN is computed using the formula below:
10509 // VLEN = sizeof(vector_register) / sizeof(CDT),
10510 // where vector register size specified in section 3.2.1 Registers and the
10511 // Stack Frame of original AMD64 ABI document.
10512 QualType RetType = FD->getReturnType();
10513 if (RetType.isNull())
10514 return 0;
10515 ASTContext &C = FD->getASTContext();
10516 QualType CDT;
10517 if (!RetType.isNull() && !RetType->isVoidType()) {
10518 CDT = RetType;
10519 } else {
10520 unsigned Offset = 0;
10521 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10522 if (ParamAttrs[Offset].Kind == Vector)
10523 CDT = C.getPointerType(T: C.getRecordType(MD->getParent()));
10524 ++Offset;
10525 }
10526 if (CDT.isNull()) {
10527 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10528 if (ParamAttrs[I + Offset].Kind == Vector) {
10529 CDT = FD->getParamDecl(i: I)->getType();
10530 break;
10531 }
10532 }
10533 }
10534 }
10535 if (CDT.isNull())
10536 CDT = C.IntTy;
10537 CDT = CDT->getCanonicalTypeUnqualified();
10538 if (CDT->isRecordType() || CDT->isUnionType())
10539 CDT = C.IntTy;
10540 return C.getTypeSize(T: CDT);
10541}
10542
10543/// Mangle the parameter part of the vector function name according to
10544/// their OpenMP classification. The mangling function is defined in
10545/// section 4.5 of the AAVFABI(2021Q1).
10546static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10547 SmallString<256> Buffer;
10548 llvm::raw_svector_ostream Out(Buffer);
10549 for (const auto &ParamAttr : ParamAttrs) {
10550 switch (ParamAttr.Kind) {
10551 case Linear:
10552 Out << 'l';
10553 break;
10554 case LinearRef:
10555 Out << 'R';
10556 break;
10557 case LinearUVal:
10558 Out << 'U';
10559 break;
10560 case LinearVal:
10561 Out << 'L';
10562 break;
10563 case Uniform:
10564 Out << 'u';
10565 break;
10566 case Vector:
10567 Out << 'v';
10568 break;
10569 }
10570 if (ParamAttr.HasVarStride)
10571 Out << "s" << ParamAttr.StrideOrArg;
10572 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10573 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10574 // Don't print the step value if it is not present or if it is
10575 // equal to 1.
10576 if (ParamAttr.StrideOrArg < 0)
10577 Out << 'n' << -ParamAttr.StrideOrArg;
10578 else if (ParamAttr.StrideOrArg != 1)
10579 Out << ParamAttr.StrideOrArg;
10580 }
10581
10582 if (!!ParamAttr.Alignment)
10583 Out << 'a' << ParamAttr.Alignment;
10584 }
10585
10586 return std::string(Out.str());
10587}
10588
10589static void
10590emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10591 const llvm::APSInt &VLENVal,
10592 ArrayRef<ParamAttrTy> ParamAttrs,
10593 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10594 struct ISADataTy {
10595 char ISA;
10596 unsigned VecRegSize;
10597 };
10598 ISADataTy ISAData[] = {
10599 {
10600 .ISA: 'b', .VecRegSize: 128
10601 }, // SSE
10602 {
10603 .ISA: 'c', .VecRegSize: 256
10604 }, // AVX
10605 {
10606 .ISA: 'd', .VecRegSize: 256
10607 }, // AVX2
10608 {
10609 .ISA: 'e', .VecRegSize: 512
10610 }, // AVX512
10611 };
10612 llvm::SmallVector<char, 2> Masked;
10613 switch (State) {
10614 case OMPDeclareSimdDeclAttr::BS_Undefined:
10615 Masked.push_back(Elt: 'N');
10616 Masked.push_back(Elt: 'M');
10617 break;
10618 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10619 Masked.push_back(Elt: 'N');
10620 break;
10621 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10622 Masked.push_back(Elt: 'M');
10623 break;
10624 }
10625 for (char Mask : Masked) {
10626 for (const ISADataTy &Data : ISAData) {
10627 SmallString<256> Buffer;
10628 llvm::raw_svector_ostream Out(Buffer);
10629 Out << "_ZGV" << Data.ISA << Mask;
10630 if (!VLENVal) {
10631 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10632 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10633 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10634 } else {
10635 Out << VLENVal;
10636 }
10637 Out << mangleVectorParameters(ParamAttrs);
10638 Out << '_' << Fn->getName();
10639 Fn->addFnAttr(Kind: Out.str());
10640 }
10641 }
10642}
10643
10644// This are the Functions that are needed to mangle the name of the
10645// vector functions generated by the compiler, according to the rules
10646// defined in the "Vector Function ABI specifications for AArch64",
10647// available at
10648// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10649
10650/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10651static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10652 QT = QT.getCanonicalType();
10653
10654 if (QT->isVoidType())
10655 return false;
10656
10657 if (Kind == ParamKindTy::Uniform)
10658 return false;
10659
10660 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10661 return false;
10662
10663 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10664 !QT->isReferenceType())
10665 return false;
10666
10667 return true;
10668}
10669
10670/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10671static bool getAArch64PBV(QualType QT, ASTContext &C) {
10672 QT = QT.getCanonicalType();
10673 unsigned Size = C.getTypeSize(T: QT);
10674
10675 // Only scalars and complex within 16 bytes wide set PVB to true.
10676 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10677 return false;
10678
10679 if (QT->isFloatingType())
10680 return true;
10681
10682 if (QT->isIntegerType())
10683 return true;
10684
10685 if (QT->isPointerType())
10686 return true;
10687
10688 // TODO: Add support for complex types (section 3.1.2, item 2).
10689
10690 return false;
10691}
10692
10693/// Computes the lane size (LS) of a return type or of an input parameter,
10694/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10695/// TODO: Add support for references, section 3.2.1, item 1.
10696static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10697 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10698 QualType PTy = QT.getCanonicalType()->getPointeeType();
10699 if (getAArch64PBV(QT: PTy, C))
10700 return C.getTypeSize(T: PTy);
10701 }
10702 if (getAArch64PBV(QT, C))
10703 return C.getTypeSize(T: QT);
10704
10705 return C.getTypeSize(T: C.getUIntPtrType());
10706}
10707
10708// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10709// signature of the scalar function, as defined in 3.2.2 of the
10710// AAVFABI.
10711static std::tuple<unsigned, unsigned, bool>
10712getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10713 QualType RetType = FD->getReturnType().getCanonicalType();
10714
10715 ASTContext &C = FD->getASTContext();
10716
10717 bool OutputBecomesInput = false;
10718
10719 llvm::SmallVector<unsigned, 8> Sizes;
10720 if (!RetType->isVoidType()) {
10721 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10722 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10723 OutputBecomesInput = true;
10724 }
10725 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10726 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10727 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10728 }
10729
10730 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10731 // The LS of a function parameter / return value can only be a power
10732 // of 2, starting from 8 bits, up to 128.
10733 assert(llvm::all_of(Sizes,
10734 [](unsigned Size) {
10735 return Size == 8 || Size == 16 || Size == 32 ||
10736 Size == 64 || Size == 128;
10737 }) &&
10738 "Invalid size");
10739
10740 return std::make_tuple(args&: *std::min_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10741 args&: *std::max_element(first: std::begin(cont&: Sizes), last: std::end(cont&: Sizes)),
10742 args&: OutputBecomesInput);
10743}
10744
10745// Function used to add the attribute. The parameter `VLEN` is
10746// templated to allow the use of "x" when targeting scalable functions
10747// for SVE.
10748template <typename T>
10749static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10750 char ISA, StringRef ParSeq,
10751 StringRef MangledName, bool OutputBecomesInput,
10752 llvm::Function *Fn) {
10753 SmallString<256> Buffer;
10754 llvm::raw_svector_ostream Out(Buffer);
10755 Out << Prefix << ISA << LMask << VLEN;
10756 if (OutputBecomesInput)
10757 Out << "v";
10758 Out << ParSeq << "_" << MangledName;
10759 Fn->addFnAttr(Kind: Out.str());
10760}
10761
10762// Helper function to generate the Advanced SIMD names depending on
10763// the value of the NDS when simdlen is not present.
10764static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10765 StringRef Prefix, char ISA,
10766 StringRef ParSeq, StringRef MangledName,
10767 bool OutputBecomesInput,
10768 llvm::Function *Fn) {
10769 switch (NDS) {
10770 case 8:
10771 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10772 OutputBecomesInput, Fn);
10773 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10774 OutputBecomesInput, Fn);
10775 break;
10776 case 16:
10777 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10778 OutputBecomesInput, Fn);
10779 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10780 OutputBecomesInput, Fn);
10781 break;
10782 case 32:
10783 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10784 OutputBecomesInput, Fn);
10785 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10786 OutputBecomesInput, Fn);
10787 break;
10788 case 64:
10789 case 128:
10790 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10791 OutputBecomesInput, Fn);
10792 break;
10793 default:
10794 llvm_unreachable("Scalar type is too wide.");
10795 }
10796}
10797
10798/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10799static void emitAArch64DeclareSimdFunction(
10800 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10801 ArrayRef<ParamAttrTy> ParamAttrs,
10802 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10803 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10804
10805 // Get basic data for building the vector signature.
10806 const auto Data = getNDSWDS(FD, ParamAttrs);
10807 const unsigned NDS = std::get<0>(t: Data);
10808 const unsigned WDS = std::get<1>(t: Data);
10809 const bool OutputBecomesInput = std::get<2>(t: Data);
10810
10811 // Check the values provided via `simdlen` by the user.
10812 // 1. A `simdlen(1)` doesn't produce vector signatures,
10813 if (UserVLEN == 1) {
10814 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10815 L: DiagnosticsEngine::Warning,
10816 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10817 CGM.getDiags().Report(Loc: SLoc, DiagID);
10818 return;
10819 }
10820
10821 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10822 // Advanced SIMD output.
10823 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10824 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10825 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10826 "power of 2 when targeting Advanced SIMD.");
10827 CGM.getDiags().Report(Loc: SLoc, DiagID);
10828 return;
10829 }
10830
10831 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10832 // limits.
10833 if (ISA == 's' && UserVLEN != 0) {
10834 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10835 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10836 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10837 "lanes in the architectural constraints "
10838 "for SVE (min is 128-bit, max is "
10839 "2048-bit, by steps of 128-bit)");
10840 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10841 return;
10842 }
10843 }
10844
10845 // Sort out parameter sequence.
10846 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10847 StringRef Prefix = "_ZGV";
10848 // Generate simdlen from user input (if any).
10849 if (UserVLEN) {
10850 if (ISA == 's') {
10851 // SVE generates only a masked function.
10852 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10853 OutputBecomesInput, Fn);
10854 } else {
10855 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10856 // Advanced SIMD generates one or two functions, depending on
10857 // the `[not]inbranch` clause.
10858 switch (State) {
10859 case OMPDeclareSimdDeclAttr::BS_Undefined:
10860 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10861 OutputBecomesInput, Fn);
10862 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10863 OutputBecomesInput, Fn);
10864 break;
10865 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10866 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10867 OutputBecomesInput, Fn);
10868 break;
10869 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10870 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10871 OutputBecomesInput, Fn);
10872 break;
10873 }
10874 }
10875 } else {
10876 // If no user simdlen is provided, follow the AAVFABI rules for
10877 // generating the vector length.
10878 if (ISA == 's') {
10879 // SVE, section 3.4.1, item 1.
10880 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
10881 OutputBecomesInput, Fn);
10882 } else {
10883 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10884 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10885 // two vector names depending on the use of the clause
10886 // `[not]inbranch`.
10887 switch (State) {
10888 case OMPDeclareSimdDeclAttr::BS_Undefined:
10889 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10890 OutputBecomesInput, Fn);
10891 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10892 OutputBecomesInput, Fn);
10893 break;
10894 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10895 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10896 OutputBecomesInput, Fn);
10897 break;
10898 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10899 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10900 OutputBecomesInput, Fn);
10901 break;
10902 }
10903 }
10904 }
10905}
10906
10907void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10908 llvm::Function *Fn) {
10909 ASTContext &C = CGM.getContext();
10910 FD = FD->getMostRecentDecl();
10911 while (FD) {
10912 // Map params to their positions in function decl.
10913 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10914 if (isa<CXXMethodDecl>(Val: FD))
10915 ParamPositions.try_emplace(FD, 0);
10916 unsigned ParamPos = ParamPositions.size();
10917 for (const ParmVarDecl *P : FD->parameters()) {
10918 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
10919 ++ParamPos;
10920 }
10921 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
10922 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
10923 // Mark uniform parameters.
10924 for (const Expr *E : Attr->uniforms()) {
10925 E = E->IgnoreParenImpCasts();
10926 unsigned Pos;
10927 if (isa<CXXThisExpr>(E)) {
10928 Pos = ParamPositions[FD];
10929 } else {
10930 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10931 ->getCanonicalDecl();
10932 auto It = ParamPositions.find(PVD);
10933 assert(It != ParamPositions.end() && "Function parameter not found");
10934 Pos = It->second;
10935 }
10936 ParamAttrs[Pos].Kind = Uniform;
10937 }
10938 // Get alignment info.
10939 auto *NI = Attr->alignments_begin();
10940 for (const Expr *E : Attr->aligneds()) {
10941 E = E->IgnoreParenImpCasts();
10942 unsigned Pos;
10943 QualType ParmTy;
10944 if (isa<CXXThisExpr>(E)) {
10945 Pos = ParamPositions[FD];
10946 ParmTy = E->getType();
10947 } else {
10948 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10949 ->getCanonicalDecl();
10950 auto It = ParamPositions.find(PVD);
10951 assert(It != ParamPositions.end() && "Function parameter not found");
10952 Pos = It->second;
10953 ParmTy = PVD->getType();
10954 }
10955 ParamAttrs[Pos].Alignment =
10956 (*NI)
10957 ? (*NI)->EvaluateKnownConstInt(C)
10958 : llvm::APSInt::getUnsigned(
10959 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
10960 .getQuantity());
10961 ++NI;
10962 }
10963 // Mark linear parameters.
10964 auto *SI = Attr->steps_begin();
10965 auto *MI = Attr->modifiers_begin();
10966 for (const Expr *E : Attr->linears()) {
10967 E = E->IgnoreParenImpCasts();
10968 unsigned Pos;
10969 bool IsReferenceType = false;
10970 // Rescaling factor needed to compute the linear parameter
10971 // value in the mangled name.
10972 unsigned PtrRescalingFactor = 1;
10973 if (isa<CXXThisExpr>(E)) {
10974 Pos = ParamPositions[FD];
10975 auto *P = cast<PointerType>(E->getType());
10976 PtrRescalingFactor = CGM.getContext()
10977 .getTypeSizeInChars(P->getPointeeType())
10978 .getQuantity();
10979 } else {
10980 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
10981 ->getCanonicalDecl();
10982 auto It = ParamPositions.find(PVD);
10983 assert(It != ParamPositions.end() && "Function parameter not found");
10984 Pos = It->second;
10985 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
10986 PtrRescalingFactor = CGM.getContext()
10987 .getTypeSizeInChars(P->getPointeeType())
10988 .getQuantity();
10989 else if (PVD->getType()->isReferenceType()) {
10990 IsReferenceType = true;
10991 PtrRescalingFactor =
10992 CGM.getContext()
10993 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
10994 .getQuantity();
10995 }
10996 }
10997 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
10998 if (*MI == OMPC_LINEAR_ref)
10999 ParamAttr.Kind = LinearRef;
11000 else if (*MI == OMPC_LINEAR_uval)
11001 ParamAttr.Kind = LinearUVal;
11002 else if (IsReferenceType)
11003 ParamAttr.Kind = LinearVal;
11004 else
11005 ParamAttr.Kind = Linear;
11006 // Assuming a stride of 1, for `linear` without modifiers.
11007 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11008 if (*SI) {
11009 Expr::EvalResult Result;
11010 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11011 if (const auto *DRE =
11012 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11013 if (const auto *StridePVD =
11014 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11015 ParamAttr.HasVarStride = true;
11016 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11017 assert(It != ParamPositions.end() &&
11018 "Function parameter not found");
11019 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11020 }
11021 }
11022 } else {
11023 ParamAttr.StrideOrArg = Result.Val.getInt();
11024 }
11025 }
11026 // If we are using a linear clause on a pointer, we need to
11027 // rescale the value of linear_step with the byte size of the
11028 // pointee type.
11029 if (!ParamAttr.HasVarStride &&
11030 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11031 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11032 ++SI;
11033 ++MI;
11034 }
11035 llvm::APSInt VLENVal;
11036 SourceLocation ExprLoc;
11037 const Expr *VLENExpr = Attr->getSimdlen();
11038 if (VLENExpr) {
11039 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11040 ExprLoc = VLENExpr->getExprLoc();
11041 }
11042 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11043 if (CGM.getTriple().isX86()) {
11044 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11045 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11046 unsigned VLEN = VLENVal.getExtValue();
11047 StringRef MangledName = Fn->getName();
11048 if (CGM.getTarget().hasFeature("sve"))
11049 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11050 MangledName, 's', 128, Fn, ExprLoc);
11051 else if (CGM.getTarget().hasFeature("neon"))
11052 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11053 MangledName, 'n', 128, Fn, ExprLoc);
11054 }
11055 }
11056 FD = FD->getPreviousDecl();
11057 }
11058}
11059
11060namespace {
11061/// Cleanup action for doacross support.
11062class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11063public:
11064 static const int DoacrossFinArgs = 2;
11065
11066private:
11067 llvm::FunctionCallee RTLFn;
11068 llvm::Value *Args[DoacrossFinArgs];
11069
11070public:
11071 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11072 ArrayRef<llvm::Value *> CallArgs)
11073 : RTLFn(RTLFn) {
11074 assert(CallArgs.size() == DoacrossFinArgs);
11075 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
11076 }
11077 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11078 if (!CGF.HaveInsertPoint())
11079 return;
11080 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11081 }
11082};
11083} // namespace
11084
11085void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11086 const OMPLoopDirective &D,
11087 ArrayRef<Expr *> NumIterations) {
11088 if (!CGF.HaveInsertPoint())
11089 return;
11090
11091 ASTContext &C = CGM.getContext();
11092 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11093 RecordDecl *RD;
11094 if (KmpDimTy.isNull()) {
11095 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11096 // kmp_int64 lo; // lower
11097 // kmp_int64 up; // upper
11098 // kmp_int64 st; // stride
11099 // };
11100 RD = C.buildImplicitRecord(Name: "kmp_dim");
11101 RD->startDefinition();
11102 addFieldToRecordDecl(C, RD, Int64Ty);
11103 addFieldToRecordDecl(C, RD, Int64Ty);
11104 addFieldToRecordDecl(C, RD, Int64Ty);
11105 RD->completeDefinition();
11106 KmpDimTy = C.getRecordType(RD);
11107 } else {
11108 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11109 }
11110 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11111 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11112 ArraySizeModifier::Normal, 0);
11113
11114 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11115 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11116 enum { LowerFD = 0, UpperFD, StrideFD };
11117 // Fill dims with data.
11118 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11119 LValue DimsLVal = CGF.MakeAddrLValue(
11120 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11121 // dims.upper = num_iterations;
11122 LValue UpperLVal = CGF.EmitLValueForField(
11123 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11124 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11125 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11126 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11127 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11128 // dims.stride = 1;
11129 LValue StrideLVal = CGF.EmitLValueForField(
11130 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11131 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11132 lvalue: StrideLVal);
11133 }
11134
11135 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11136 // kmp_int32 num_dims, struct kmp_dim * dims);
11137 llvm::Value *Args[] = {
11138 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11139 getThreadID(CGF, Loc: D.getBeginLoc()),
11140 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11141 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11142 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
11143 DestTy: CGM.VoidPtrTy)};
11144
11145 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11146 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11147 CGF.EmitRuntimeCall(RTLFn, Args);
11148 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11149 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11150 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11151 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11152 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11153 A: llvm::ArrayRef(FiniArgs));
11154}
11155
11156template <typename T>
11157static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11158 const T *C, llvm::Value *ULoc,
11159 llvm::Value *ThreadID) {
11160 QualType Int64Ty =
11161 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11162 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11163 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11164 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11165 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11166 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11167 const Expr *CounterVal = C->getLoopData(I);
11168 assert(CounterVal);
11169 llvm::Value *CntVal = CGF.EmitScalarConversion(
11170 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11171 Loc: CounterVal->getExprLoc());
11172 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11173 /*Volatile=*/false, Ty: Int64Ty);
11174 }
11175 llvm::Value *Args[] = {
11176 ULoc, ThreadID,
11177 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
11178 llvm::FunctionCallee RTLFn;
11179 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11180 OMPDoacrossKind<T> ODK;
11181 if (ODK.isSource(C)) {
11182 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11183 FnID: OMPRTL___kmpc_doacross_post);
11184 } else {
11185 assert(ODK.isSink(C) && "Expect sink modifier.");
11186 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11187 FnID: OMPRTL___kmpc_doacross_wait);
11188 }
11189 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11190}
11191
11192void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11193 const OMPDependClause *C) {
11194 return EmitDoacrossOrdered<OMPDependClause>(
11195 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11196 getThreadID(CGF, Loc: C->getBeginLoc()));
11197}
11198
11199void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11200 const OMPDoacrossClause *C) {
11201 return EmitDoacrossOrdered<OMPDoacrossClause>(
11202 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11203 getThreadID(CGF, Loc: C->getBeginLoc()));
11204}
11205
11206void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11207 llvm::FunctionCallee Callee,
11208 ArrayRef<llvm::Value *> Args) const {
11209 assert(Loc.isValid() && "Outlined function call location must be valid.");
11210 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11211
11212 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11213 if (Fn->doesNotThrow()) {
11214 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11215 return;
11216 }
11217 }
11218 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11219}
11220
11221void CGOpenMPRuntime::emitOutlinedFunctionCall(
11222 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11223 ArrayRef<llvm::Value *> Args) const {
11224 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11225}
11226
11227void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11228 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11229 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11230 HasEmittedDeclareTargetRegion = true;
11231}
11232
11233Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11234 const VarDecl *NativeParam,
11235 const VarDecl *TargetParam) const {
11236 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11237}
11238
11239/// Return allocator value from expression, or return a null allocator (default
11240/// when no allocator specified).
11241static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11242 const Expr *Allocator) {
11243 llvm::Value *AllocVal;
11244 if (Allocator) {
11245 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11246 // According to the standard, the original allocator type is a enum
11247 // (integer). Convert to pointer type, if required.
11248 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11249 DstTy: CGF.getContext().VoidPtrTy,
11250 Loc: Allocator->getExprLoc());
11251 } else {
11252 // If no allocator specified, it defaults to the null allocator.
11253 AllocVal = llvm::Constant::getNullValue(
11254 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11255 }
11256 return AllocVal;
11257}
11258
11259/// Return the alignment from an allocate directive if present.
11260static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11261 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11262
11263 if (!AllocateAlignment)
11264 return nullptr;
11265
11266 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11267}
11268
11269Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11270 const VarDecl *VD) {
11271 if (!VD)
11272 return Address::invalid();
11273 Address UntiedAddr = Address::invalid();
11274 Address UntiedRealAddr = Address::invalid();
11275 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11276 if (It != FunctionToUntiedTaskStackMap.end()) {
11277 const UntiedLocalVarsAddressesMap &UntiedData =
11278 UntiedLocalVarsStack[It->second];
11279 auto I = UntiedData.find(Key: VD);
11280 if (I != UntiedData.end()) {
11281 UntiedAddr = I->second.first;
11282 UntiedRealAddr = I->second.second;
11283 }
11284 }
11285 const VarDecl *CVD = VD->getCanonicalDecl();
11286 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11287 // Use the default allocation.
11288 if (!isAllocatableDecl(VD))
11289 return UntiedAddr;
11290 llvm::Value *Size;
11291 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11292 if (CVD->getType()->isVariablyModifiedType()) {
11293 Size = CGF.getTypeSize(Ty: CVD->getType());
11294 // Align the size: ((size + align - 1) / align) * align
11295 Size = CGF.Builder.CreateNUWAdd(
11296 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11297 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11298 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11299 } else {
11300 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11301 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11302 }
11303 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11304 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11305 const Expr *Allocator = AA->getAllocator();
11306 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11307 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11308 SmallVector<llvm::Value *, 4> Args;
11309 Args.push_back(Elt: ThreadID);
11310 if (Alignment)
11311 Args.push_back(Elt: Alignment);
11312 Args.push_back(Elt: Size);
11313 Args.push_back(Elt: AllocVal);
11314 llvm::omp::RuntimeFunction FnID =
11315 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11316 llvm::Value *Addr = CGF.EmitRuntimeCall(
11317 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), Args,
11318 getName(Parts: {CVD->getName(), ".void.addr"}));
11319 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11320 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11321 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11322 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11323 Addr, CGF.ConvertTypeForMem(T: Ty), getName(Parts: {CVD->getName(), ".addr"}));
11324 if (UntiedAddr.isValid())
11325 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11326
11327 // Cleanup action for allocate support.
11328 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11329 llvm::FunctionCallee RTLFn;
11330 SourceLocation::UIntTy LocEncoding;
11331 Address Addr;
11332 const Expr *AllocExpr;
11333
11334 public:
11335 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11336 SourceLocation::UIntTy LocEncoding, Address Addr,
11337 const Expr *AllocExpr)
11338 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11339 AllocExpr(AllocExpr) {}
11340 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11341 if (!CGF.HaveInsertPoint())
11342 return;
11343 llvm::Value *Args[3];
11344 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11345 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11346 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11347 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
11348 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11349 Args[2] = AllocVal;
11350 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11351 }
11352 };
11353 Address VDAddr =
11354 UntiedRealAddr.isValid()
11355 ? UntiedRealAddr
11356 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11357 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11358 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11359 VDAddr, Allocator);
11360 if (UntiedRealAddr.isValid())
11361 if (auto *Region =
11362 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11363 Region->emitUntiedSwitch(CGF);
11364 return VDAddr;
11365 }
11366 return UntiedAddr;
11367}
11368
11369bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11370 const VarDecl *VD) const {
11371 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11372 if (It == FunctionToUntiedTaskStackMap.end())
11373 return false;
11374 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11375}
11376
11377CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11378 CodeGenModule &CGM, const OMPLoopDirective &S)
11379 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11380 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11381 if (!NeedToPush)
11382 return;
11383 NontemporalDeclsSet &DS =
11384 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11385 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11386 for (const Stmt *Ref : C->private_refs()) {
11387 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11388 const ValueDecl *VD;
11389 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11390 VD = DRE->getDecl();
11391 } else {
11392 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11393 assert((ME->isImplicitCXXThis() ||
11394 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11395 "Expected member of current class.");
11396 VD = ME->getMemberDecl();
11397 }
11398 DS.insert(VD);
11399 }
11400 }
11401}
11402
11403CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11404 if (!NeedToPush)
11405 return;
11406 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11407}
11408
11409CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11410 CodeGenFunction &CGF,
11411 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11412 std::pair<Address, Address>> &LocalVars)
11413 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11414 if (!NeedToPush)
11415 return;
11416 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11417 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11418 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11419}
11420
11421CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11422 if (!NeedToPush)
11423 return;
11424 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11425}
11426
11427bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11428 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11429
11430 return llvm::any_of(
11431 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11432 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11433}
11434
11435void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11436 const OMPExecutableDirective &S,
11437 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11438 const {
11439 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11440 // Vars in target/task regions must be excluded completely.
11441 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11442 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11443 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11444 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11445 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11446 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11447 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11448 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11449 }
11450 }
11451 // Exclude vars in private clauses.
11452 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11453 for (const Expr *Ref : C->varlists()) {
11454 if (!Ref->getType()->isScalarType())
11455 continue;
11456 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11457 if (!DRE)
11458 continue;
11459 NeedToCheckForLPCs.insert(DRE->getDecl());
11460 }
11461 }
11462 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11463 for (const Expr *Ref : C->varlists()) {
11464 if (!Ref->getType()->isScalarType())
11465 continue;
11466 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11467 if (!DRE)
11468 continue;
11469 NeedToCheckForLPCs.insert(DRE->getDecl());
11470 }
11471 }
11472 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11473 for (const Expr *Ref : C->varlists()) {
11474 if (!Ref->getType()->isScalarType())
11475 continue;
11476 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11477 if (!DRE)
11478 continue;
11479 NeedToCheckForLPCs.insert(DRE->getDecl());
11480 }
11481 }
11482 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11483 for (const Expr *Ref : C->varlists()) {
11484 if (!Ref->getType()->isScalarType())
11485 continue;
11486 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11487 if (!DRE)
11488 continue;
11489 NeedToCheckForLPCs.insert(DRE->getDecl());
11490 }
11491 }
11492 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11493 for (const Expr *Ref : C->varlists()) {
11494 if (!Ref->getType()->isScalarType())
11495 continue;
11496 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11497 if (!DRE)
11498 continue;
11499 NeedToCheckForLPCs.insert(DRE->getDecl());
11500 }
11501 }
11502 for (const Decl *VD : NeedToCheckForLPCs) {
11503 for (const LastprivateConditionalData &Data :
11504 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11505 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11506 if (!Data.Disabled)
11507 NeedToAddForLPCsAsDisabled.insert(V: VD);
11508 break;
11509 }
11510 }
11511 }
11512}
11513
11514CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11515 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11516 : CGM(CGF.CGM),
11517 Action((CGM.getLangOpts().OpenMP >= 50 &&
11518 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11519 P: [](const OMPLastprivateClause *C) {
11520 return C->getKind() ==
11521 OMPC_LASTPRIVATE_conditional;
11522 }))
11523 ? ActionToDo::PushAsLastprivateConditional
11524 : ActionToDo::DoNotPush) {
11525 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11526 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11527 return;
11528 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11529 "Expected a push action.");
11530 LastprivateConditionalData &Data =
11531 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11532 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11533 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11534 continue;
11535
11536 for (const Expr *Ref : C->varlists()) {
11537 Data.DeclToUniqueName.insert(std::make_pair(
11538 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11539 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11540 }
11541 }
11542 Data.IVLVal = IVLVal;
11543 Data.Fn = CGF.CurFn;
11544}
11545
11546CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11547 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11548 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11549 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11550 if (CGM.getLangOpts().OpenMP < 50)
11551 return;
11552 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11553 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11554 if (!NeedToAddForLPCsAsDisabled.empty()) {
11555 Action = ActionToDo::DisableLastprivateConditional;
11556 LastprivateConditionalData &Data =
11557 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11558 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11559 Data.DeclToUniqueName.insert(KV: std::make_pair(x&: VD, y: SmallString<16>()));
11560 Data.Fn = CGF.CurFn;
11561 Data.Disabled = true;
11562 }
11563}
11564
11565CGOpenMPRuntime::LastprivateConditionalRAII
11566CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11567 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11568 return LastprivateConditionalRAII(CGF, S);
11569}
11570
11571CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11572 if (CGM.getLangOpts().OpenMP < 50)
11573 return;
11574 if (Action == ActionToDo::DisableLastprivateConditional) {
11575 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11576 "Expected list of disabled private vars.");
11577 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11578 }
11579 if (Action == ActionToDo::PushAsLastprivateConditional) {
11580 assert(
11581 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11582 "Expected list of lastprivate conditional vars.");
11583 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11584 }
11585}
11586
11587Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11588 const VarDecl *VD) {
11589 ASTContext &C = CGM.getContext();
11590 auto I = LastprivateConditionalToTypes.find(Val: CGF.CurFn);
11591 if (I == LastprivateConditionalToTypes.end())
11592 I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11593 QualType NewType;
11594 const FieldDecl *VDField;
11595 const FieldDecl *FiredField;
11596 LValue BaseLVal;
11597 auto VI = I->getSecond().find(VD);
11598 if (VI == I->getSecond().end()) {
11599 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11600 RD->startDefinition();
11601 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11602 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11603 RD->completeDefinition();
11604 NewType = C.getRecordType(Decl: RD);
11605 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11606 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11607 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11608 } else {
11609 NewType = std::get<0>(VI->getSecond());
11610 VDField = std::get<1>(VI->getSecond());
11611 FiredField = std::get<2>(VI->getSecond());
11612 BaseLVal = std::get<3>(VI->getSecond());
11613 }
11614 LValue FiredLVal =
11615 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11616 CGF.EmitStoreOfScalar(
11617 llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11618 FiredLVal);
11619 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress(CGF);
11620}
11621
11622namespace {
11623/// Checks if the lastprivate conditional variable is referenced in LHS.
11624class LastprivateConditionalRefChecker final
11625 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11626 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11627 const Expr *FoundE = nullptr;
11628 const Decl *FoundD = nullptr;
11629 StringRef UniqueDeclName;
11630 LValue IVLVal;
11631 llvm::Function *FoundFn = nullptr;
11632 SourceLocation Loc;
11633
11634public:
11635 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11636 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11637 llvm::reverse(C&: LPM)) {
11638 auto It = D.DeclToUniqueName.find(E->getDecl());
11639 if (It == D.DeclToUniqueName.end())
11640 continue;
11641 if (D.Disabled)
11642 return false;
11643 FoundE = E;
11644 FoundD = E->getDecl()->getCanonicalDecl();
11645 UniqueDeclName = It->second;
11646 IVLVal = D.IVLVal;
11647 FoundFn = D.Fn;
11648 break;
11649 }
11650 return FoundE == E;
11651 }
11652 bool VisitMemberExpr(const MemberExpr *E) {
11653 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11654 return false;
11655 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11656 llvm::reverse(C&: LPM)) {
11657 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11658 if (It == D.DeclToUniqueName.end())
11659 continue;
11660 if (D.Disabled)
11661 return false;
11662 FoundE = E;
11663 FoundD = E->getMemberDecl()->getCanonicalDecl();
11664 UniqueDeclName = It->second;
11665 IVLVal = D.IVLVal;
11666 FoundFn = D.Fn;
11667 break;
11668 }
11669 return FoundE == E;
11670 }
11671 bool VisitStmt(const Stmt *S) {
11672 for (const Stmt *Child : S->children()) {
11673 if (!Child)
11674 continue;
11675 if (const auto *E = dyn_cast<Expr>(Val: Child))
11676 if (!E->isGLValue())
11677 continue;
11678 if (Visit(Child))
11679 return true;
11680 }
11681 return false;
11682 }
11683 explicit LastprivateConditionalRefChecker(
11684 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11685 : LPM(LPM) {}
11686 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11687 getFoundData() const {
11688 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11689 }
11690};
11691} // namespace
11692
11693void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11694 LValue IVLVal,
11695 StringRef UniqueDeclName,
11696 LValue LVal,
11697 SourceLocation Loc) {
11698 // Last updated loop counter for the lastprivate conditional var.
11699 // int<xx> last_iv = 0;
11700 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11701 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11702 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11703 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11704 IVLVal.getAlignment().getAsAlign());
11705 LValue LastIVLVal =
11706 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
11707
11708 // Last value of the lastprivate conditional.
11709 // decltype(priv_a) last_a;
11710 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11711 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11712 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
11713 LVal.getAlignment().getAsAlign());
11714 LValue LastLVal =
11715 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
11716
11717 // Global loop counter. Required to handle inner parallel-for regions.
11718 // iv
11719 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11720
11721 // #pragma omp critical(a)
11722 // if (last_iv <= iv) {
11723 // last_iv = iv;
11724 // last_a = priv_a;
11725 // }
11726 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11727 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11728 Action.Enter(CGF);
11729 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11730 // (last_iv <= iv) ? Check if the variable is updated and store new
11731 // value in global var.
11732 llvm::Value *CmpRes;
11733 if (IVLVal.getType()->isSignedIntegerType()) {
11734 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11735 } else {
11736 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11737 "Loop iteration variable must be integer.");
11738 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11739 }
11740 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11741 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11742 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11743 // {
11744 CGF.EmitBlock(BB: ThenBB);
11745
11746 // last_iv = iv;
11747 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11748
11749 // last_a = priv_a;
11750 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11751 case TEK_Scalar: {
11752 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11753 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11754 break;
11755 }
11756 case TEK_Complex: {
11757 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11758 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11759 break;
11760 }
11761 case TEK_Aggregate:
11762 llvm_unreachable(
11763 "Aggregates are not supported in lastprivate conditional.");
11764 }
11765 // }
11766 CGF.EmitBranch(Block: ExitBB);
11767 // There is no need to emit line number for unconditional branch.
11768 (void)ApplyDebugLocation::CreateEmpty(CGF);
11769 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11770 };
11771
11772 if (CGM.getLangOpts().OpenMPSimd) {
11773 // Do not emit as a critical region as no parallel region could be emitted.
11774 RegionCodeGenTy ThenRCG(CodeGen);
11775 ThenRCG(CGF);
11776 } else {
11777 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11778 }
11779}
11780
11781void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11782 const Expr *LHS) {
11783 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11784 return;
11785 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11786 if (!Checker.Visit(LHS))
11787 return;
11788 const Expr *FoundE;
11789 const Decl *FoundD;
11790 StringRef UniqueDeclName;
11791 LValue IVLVal;
11792 llvm::Function *FoundFn;
11793 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11794 Checker.getFoundData();
11795 if (FoundFn != CGF.CurFn) {
11796 // Special codegen for inner parallel regions.
11797 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11798 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11799 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11800 "Lastprivate conditional is not found in outer region.");
11801 QualType StructTy = std::get<0>(t&: It->getSecond());
11802 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11803 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11804 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11805 Addr: PrivLVal.getAddress(CGF),
11806 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11807 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11808 LValue BaseLVal =
11809 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11810 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11811 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11812 CGF.ConvertTypeForMem(T: FiredDecl->getType()), 1)),
11813 FiredLVal, llvm::AtomicOrdering::Unordered,
11814 /*IsVolatile=*/true, /*isInit=*/false);
11815 return;
11816 }
11817
11818 // Private address of the lastprivate conditional in the current context.
11819 // priv_a
11820 LValue LVal = CGF.EmitLValue(E: FoundE);
11821 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11822 Loc: FoundE->getExprLoc());
11823}
11824
11825void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11826 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11827 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11828 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11829 return;
11830 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11831 auto It = llvm::find_if(
11832 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11833 if (It == Range.end() || It->Fn != CGF.CurFn)
11834 return;
11835 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11836 assert(LPCI != LastprivateConditionalToTypes.end() &&
11837 "Lastprivates must be registered already.");
11838 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11839 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11840 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11841 for (const auto &Pair : It->DeclToUniqueName) {
11842 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11843 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11844 continue;
11845 auto I = LPCI->getSecond().find(Val: Pair.first);
11846 assert(I != LPCI->getSecond().end() &&
11847 "Lastprivate must be rehistered already.");
11848 // bool Cmp = priv_a.Fired != 0;
11849 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11850 LValue FiredLVal =
11851 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11852 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11853 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11854 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11855 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11856 // if (Cmp) {
11857 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11858 CGF.EmitBlock(BB: ThenBB);
11859 Address Addr = CGF.GetAddrOfLocalVar(VD);
11860 LValue LVal;
11861 if (VD->getType()->isReferenceType())
11862 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11863 AlignmentSource::Decl);
11864 else
11865 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11866 AlignmentSource::Decl);
11867 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11868 Loc: D.getBeginLoc());
11869 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11870 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11871 // }
11872 }
11873}
11874
11875void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11876 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11877 SourceLocation Loc) {
11878 if (CGF.getLangOpts().OpenMP < 50)
11879 return;
11880 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11881 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11882 "Unknown lastprivate conditional variable.");
11883 StringRef UniqueName = It->second;
11884 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
11885 // The variable was not updated in the region - exit.
11886 if (!GV)
11887 return;
11888 LValue LPLVal = CGF.MakeRawAddrLValue(
11889 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
11890 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
11891 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
11892}
11893
11894llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11895 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11896 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11897 const RegionCodeGenTy &CodeGen) {
11898 llvm_unreachable("Not supported in SIMD-only mode");
11899}
11900
11901llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11902 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11903 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11904 const RegionCodeGenTy &CodeGen) {
11905 llvm_unreachable("Not supported in SIMD-only mode");
11906}
11907
11908llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11909 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11910 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11911 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11912 bool Tied, unsigned &NumberOfParts) {
11913 llvm_unreachable("Not supported in SIMD-only mode");
11914}
11915
11916void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11917 SourceLocation Loc,
11918 llvm::Function *OutlinedFn,
11919 ArrayRef<llvm::Value *> CapturedVars,
11920 const Expr *IfCond,
11921 llvm::Value *NumThreads) {
11922 llvm_unreachable("Not supported in SIMD-only mode");
11923}
11924
11925void CGOpenMPSIMDRuntime::emitCriticalRegion(
11926 CodeGenFunction &CGF, StringRef CriticalName,
11927 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
11928 const Expr *Hint) {
11929 llvm_unreachable("Not supported in SIMD-only mode");
11930}
11931
11932void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
11933 const RegionCodeGenTy &MasterOpGen,
11934 SourceLocation Loc) {
11935 llvm_unreachable("Not supported in SIMD-only mode");
11936}
11937
11938void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
11939 const RegionCodeGenTy &MasterOpGen,
11940 SourceLocation Loc,
11941 const Expr *Filter) {
11942 llvm_unreachable("Not supported in SIMD-only mode");
11943}
11944
11945void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
11946 SourceLocation Loc) {
11947 llvm_unreachable("Not supported in SIMD-only mode");
11948}
11949
11950void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11951 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
11952 SourceLocation Loc) {
11953 llvm_unreachable("Not supported in SIMD-only mode");
11954}
11955
11956void CGOpenMPSIMDRuntime::emitSingleRegion(
11957 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
11958 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
11959 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
11960 ArrayRef<const Expr *> AssignmentOps) {
11961 llvm_unreachable("Not supported in SIMD-only mode");
11962}
11963
11964void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
11965 const RegionCodeGenTy &OrderedOpGen,
11966 SourceLocation Loc,
11967 bool IsThreads) {
11968 llvm_unreachable("Not supported in SIMD-only mode");
11969}
11970
11971void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
11972 SourceLocation Loc,
11973 OpenMPDirectiveKind Kind,
11974 bool EmitChecks,
11975 bool ForceSimpleCall) {
11976 llvm_unreachable("Not supported in SIMD-only mode");
11977}
11978
11979void CGOpenMPSIMDRuntime::emitForDispatchInit(
11980 CodeGenFunction &CGF, SourceLocation Loc,
11981 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
11982 bool Ordered, const DispatchRTInput &DispatchValues) {
11983 llvm_unreachable("Not supported in SIMD-only mode");
11984}
11985
11986void CGOpenMPSIMDRuntime::emitForStaticInit(
11987 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
11988 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11990}
11991
11992void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
11993 CodeGenFunction &CGF, SourceLocation Loc,
11994 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
11995 llvm_unreachable("Not supported in SIMD-only mode");
11996}
11997
11998void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
11999 SourceLocation Loc,
12000 unsigned IVSize,
12001 bool IVSigned) {
12002 llvm_unreachable("Not supported in SIMD-only mode");
12003}
12004
12005void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12006 SourceLocation Loc,
12007 OpenMPDirectiveKind DKind) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12011llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12012 SourceLocation Loc,
12013 unsigned IVSize, bool IVSigned,
12014 Address IL, Address LB,
12015 Address UB, Address ST) {
12016 llvm_unreachable("Not supported in SIMD-only mode");
12017}
12018
12019void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12020 llvm::Value *NumThreads,
12021 SourceLocation Loc) {
12022 llvm_unreachable("Not supported in SIMD-only mode");
12023}
12024
12025void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12026 ProcBindKind ProcBind,
12027 SourceLocation Loc) {
12028 llvm_unreachable("Not supported in SIMD-only mode");
12029}
12030
12031Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12032 const VarDecl *VD,
12033 Address VDAddr,
12034 SourceLocation Loc) {
12035 llvm_unreachable("Not supported in SIMD-only mode");
12036}
12037
12038llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12039 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12040 CodeGenFunction *CGF) {
12041 llvm_unreachable("Not supported in SIMD-only mode");
12042}
12043
12044Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12045 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12047}
12048
12049void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12050 ArrayRef<const Expr *> Vars,
12051 SourceLocation Loc,
12052 llvm::AtomicOrdering AO) {
12053 llvm_unreachable("Not supported in SIMD-only mode");
12054}
12055
12056void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12057 const OMPExecutableDirective &D,
12058 llvm::Function *TaskFunction,
12059 QualType SharedsTy, Address Shareds,
12060 const Expr *IfCond,
12061 const OMPTaskDataTy &Data) {
12062 llvm_unreachable("Not supported in SIMD-only mode");
12063}
12064
12065void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12066 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12067 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12068 const Expr *IfCond, const OMPTaskDataTy &Data) {
12069 llvm_unreachable("Not supported in SIMD-only mode");
12070}
12071
12072void CGOpenMPSIMDRuntime::emitReduction(
12073 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12074 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12075 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12076 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12077 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12078 ReductionOps, Options);
12079}
12080
12081llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12082 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12083 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12084 llvm_unreachable("Not supported in SIMD-only mode");
12085}
12086
12087void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12088 SourceLocation Loc,
12089 bool IsWorksharingReduction) {
12090 llvm_unreachable("Not supported in SIMD-only mode");
12091}
12092
12093void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12094 SourceLocation Loc,
12095 ReductionCodeGen &RCG,
12096 unsigned N) {
12097 llvm_unreachable("Not supported in SIMD-only mode");
12098}
12099
12100Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12101 SourceLocation Loc,
12102 llvm::Value *ReductionsPtr,
12103 LValue SharedLVal) {
12104 llvm_unreachable("Not supported in SIMD-only mode");
12105}
12106
12107void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12108 SourceLocation Loc,
12109 const OMPTaskDataTy &Data) {
12110 llvm_unreachable("Not supported in SIMD-only mode");
12111}
12112
12113void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12114 CodeGenFunction &CGF, SourceLocation Loc,
12115 OpenMPDirectiveKind CancelRegion) {
12116 llvm_unreachable("Not supported in SIMD-only mode");
12117}
12118
12119void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12120 SourceLocation Loc, const Expr *IfCond,
12121 OpenMPDirectiveKind CancelRegion) {
12122 llvm_unreachable("Not supported in SIMD-only mode");
12123}
12124
12125void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12126 const OMPExecutableDirective &D, StringRef ParentName,
12127 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12128 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12129 llvm_unreachable("Not supported in SIMD-only mode");
12130}
12131
12132void CGOpenMPSIMDRuntime::emitTargetCall(
12133 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12134 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12135 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12136 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12137 const OMPLoopDirective &D)>
12138 SizeEmitter) {
12139 llvm_unreachable("Not supported in SIMD-only mode");
12140}
12141
12142bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12143 llvm_unreachable("Not supported in SIMD-only mode");
12144}
12145
12146bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12147 llvm_unreachable("Not supported in SIMD-only mode");
12148}
12149
12150bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12151 return false;
12152}
12153
12154void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12155 const OMPExecutableDirective &D,
12156 SourceLocation Loc,
12157 llvm::Function *OutlinedFn,
12158 ArrayRef<llvm::Value *> CapturedVars) {
12159 llvm_unreachable("Not supported in SIMD-only mode");
12160}
12161
12162void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12163 const Expr *NumTeams,
12164 const Expr *ThreadLimit,
12165 SourceLocation Loc) {
12166 llvm_unreachable("Not supported in SIMD-only mode");
12167}
12168
12169void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12170 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12171 const Expr *Device, const RegionCodeGenTy &CodeGen,
12172 CGOpenMPRuntime::TargetDataInfo &Info) {
12173 llvm_unreachable("Not supported in SIMD-only mode");
12174}
12175
12176void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12177 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12178 const Expr *Device) {
12179 llvm_unreachable("Not supported in SIMD-only mode");
12180}
12181
12182void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12183 const OMPLoopDirective &D,
12184 ArrayRef<Expr *> NumIterations) {
12185 llvm_unreachable("Not supported in SIMD-only mode");
12186}
12187
12188void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12189 const OMPDependClause *C) {
12190 llvm_unreachable("Not supported in SIMD-only mode");
12191}
12192
12193void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12194 const OMPDoacrossClause *C) {
12195 llvm_unreachable("Not supported in SIMD-only mode");
12196}
12197
12198const VarDecl *
12199CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12200 const VarDecl *NativeParam) const {
12201 llvm_unreachable("Not supported in SIMD-only mode");
12202}
12203
12204Address
12205CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12206 const VarDecl *NativeParam,
12207 const VarDecl *TargetParam) const {
12208 llvm_unreachable("Not supported in SIMD-only mode");
12209}
12210

source code of clang/lib/CodeGen/CGOpenMPRuntime.cpp