1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
164 PtrTy: PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(PartIdLVal, PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
169 CGF.EmitBlock(BB: DoneBB);
170 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
171 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
172 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
173 Dest: CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
181 PtrTy: PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
183 lvalue: PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
187 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
189 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
190 Dest: CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
192 CGF.EmitBlock(BB: CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(&DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
570void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571 CodeGenFunction::RunCleanupsScope Scope(CGF);
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
594static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
602 const auto *CE = cast<CallExpr>(Val: InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
612 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
613 (void)PrivateScope.Privatize();
614 RValue Func = RValue::get(V: Reduction.second);
615 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616 CGF.EmitIgnoredExpr(E: InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(T: Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
631 RValue::getComplex(CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(&OVE, Private, Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
653static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BB: BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
691 Name: "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
703
704 // Emit copy.
705 {
706 CodeGenFunction::RunCleanupsScope InitScope(CGF);
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
709 Original: SrcElementCurrent, Ty: ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
719 Name: "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
726 Name: "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
731 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
744 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, PrivateAddr, PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SharedAddr);
763}
764
765ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766 ArrayRef<const Expr *> Origs,
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(N: Shareds.size());
770 SharedAddresses.reserve(N: Shareds.size());
771 Sizes.reserve(N: Shareds.size());
772 BaseDecls.reserve(N: Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
778 std::advance(i&: IOrig, n: 1);
779 std::advance(i&: IPriv, n: 1);
780 std::advance(i&: IRed, n: 1);
781 }
782}
783
784void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
789 SharedAddresses.emplace_back(Args&: First, Args&: Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(Args&: First, Args&: Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
795 OrigAddresses.emplace_back(Args&: First, Args&: Second);
796 }
797}
798
799void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
805 nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
814 LHS: OrigAddresses[N].second.getPointer(CGF),
815 RHS: OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
823 }
824 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
825 CodeGenFunction::OpaqueValueMapping OpaqueMap(
826 CGF,
827 cast<OpaqueValueExpr>(
828 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
829 RValue::get(V: Size));
830 CGF.EmitVariablyModifiedType(Ty: PrivateType);
831}
832
833void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
842 CodeGenFunction::OpaqueValueMapping OpaqueMap(
843 CGF,
844 cast<OpaqueValueExpr>(
845 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
846 RValue::get(V: Size));
847 CGF.EmitVariablyModifiedType(Ty: PrivateType);
848}
849
850void ReductionCodeGen::emitInitialization(
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
866 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
870 Quals: PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
875bool ReductionCodeGen::needCleanups(unsigned N) {
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
881void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
888 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
907 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
908 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
909}
910
911static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912 Address OriginalBaseAddress, llvm::Value *Addr) {
913 RawAddress Tmp = RawAddress::invalid();
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
919 Tmp = CGF.CreateMemTemp(T: BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
929 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930 V: Addr, DestTy: Tmp.getElementType());
931 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
932 return MostTopTmp;
933 }
934
935 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936 V: Addr, DestTy: OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
948 DE = cast<DeclRefExpr>(Val: Base);
949 OrigVD = cast<VarDecl>(Val: DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
954 DE = cast<DeclRefExpr>(Val: Base);
955 OrigVD = cast<VarDecl>(Val: DE->getDecl());
956 }
957 return OrigVD;
958}
959
960Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(Args&: OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, OrigVD->getType(), SharedAddresses[N].first.getType(),
968 OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
972 RHS: SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
974 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
978 return castToBase(CGF, OrigVD->getType(),
979 SharedAddresses[N].first.getType(),
980 OriginalBaseLValue.getAddress(), Ptr);
981 }
982 BaseDecls.emplace_back(
983 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
987bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
996 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1009 CGF.incrementProfileCounter(S);
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1017 getThreadIDVariable()->getType(),
1018 AlignmentSource::Decl);
1019}
1020
1021static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1025 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(Field);
1029 return Field;
1030}
1031
1032CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 OMPBuilder.initialize();
1041 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1042 ? CGM.getLangOpts().OMPHostIRFile
1043 : StringRef{});
1044 OMPBuilder.setConfig(Config);
1045
1046 // The user forces the compiler to behave as if omp requires
1047 // unified_shared_memory was given.
1048 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 HasRequiresUnifiedSharedMemory = true;
1050 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051 }
1052}
1053
1054void CGOpenMPRuntime::clear() {
1055 InternalVars.clear();
1056 // Clean non-target variable declarations possibly used only in debug info.
1057 for (const auto &Data : EmittedNonTargetVariables) {
1058 if (!Data.getValue().pointsToAliveValue())
1059 continue;
1060 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1061 if (!GV)
1062 continue;
1063 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064 continue;
1065 GV->eraseFromParent();
1066 }
1067}
1068
1069std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070 return OMPBuilder.createPlatformSpecificName(Parts);
1071}
1072
1073static llvm::Function *
1074emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075 const Expr *CombinerInitializer, const VarDecl *In,
1076 const VarDecl *Out, bool IsCombiner) {
1077 // void .omp_combiner.(Ty *in, Ty *out);
1078 ASTContext &C = CGM.getContext();
1079 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1080 FunctionArgList Args;
1081 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 Args.push_back(&OmpOutParm);
1086 Args.push_back(&OmpInParm);
1087 const CGFunctionInfo &FnInfo =
1088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
1089 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1090 std::string Name = CGM.getOpenMPRuntime().getName(
1091 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1093 N: Name, M: &CGM.getModule());
1094 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1095 if (CGM.getLangOpts().Optimize) {
1096 Fn->removeFnAttr(llvm::Attribute::NoInline);
1097 Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
1098 Fn->addFnAttr(llvm::Attribute::AlwaysInline);
1099 }
1100 CodeGenFunction CGF(CGM);
1101 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo, Args, Loc: In->getLocation(),
1104 StartLoc: Out->getLocation());
1105 CodeGenFunction::OMPPrivateScope Scope(CGF);
1106 Address AddrIn = CGF.GetAddrOfLocalVar(&OmpInParm);
1107 Scope.addPrivate(
1108 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1109 .getAddress());
1110 Address AddrOut = CGF.GetAddrOfLocalVar(&OmpOutParm);
1111 Scope.addPrivate(
1112 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 (void)Scope.Privatize();
1115 if (!IsCombiner && Out->hasInit() &&
1116 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1117 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1118 Quals: Out->getType().getQualifiers(),
1119 /*IsInitializer=*/true);
1120 }
1121 if (CombinerInitializer)
1122 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1123 Scope.ForceCleanup();
1124 CGF.FinishFunction();
1125 return Fn;
1126}
1127
1128void CGOpenMPRuntime::emitUserDefinedReduction(
1129 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130 if (UDRMap.count(Val: D) > 0)
1131 return;
1132 llvm::Function *Combiner = emitCombinerOrInitializer(
1133 CGM, D->getType(), D->getCombiner(),
1134 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1135 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1136 /*IsCombiner=*/true);
1137 llvm::Function *Initializer = nullptr;
1138 if (const Expr *Init = D->getInitializer()) {
1139 Initializer = emitCombinerOrInitializer(
1140 CGM, D->getType(),
1141 D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142 : nullptr,
1143 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1144 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1145 /*IsCombiner=*/false);
1146 }
1147 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1148 if (CGF)
1149 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1150}
1151
1152std::pair<llvm::Function *, llvm::Function *>
1153CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154 auto I = UDRMap.find(Val: D);
1155 if (I != UDRMap.end())
1156 return I->second;
1157 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158 return UDRMap.lookup(Val: D);
1159}
1160
1161namespace {
1162// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163// Builder if one is present.
1164struct PushAndPopStackRAII {
1165 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166 bool HasCancel, llvm::omp::Directive Kind)
1167 : OMPBuilder(OMPBuilder) {
1168 if (!OMPBuilder)
1169 return;
1170
1171 // The following callback is the crucial part of clangs cleanup process.
1172 //
1173 // NOTE:
1174 // Once the OpenMPIRBuilder is used to create parallel regions (and
1175 // similar), the cancellation destination (Dest below) is determined via
1176 // IP. That means if we have variables to finalize we split the block at IP,
1177 // use the new block (=BB) as destination to build a JumpDest (via
1178 // getJumpDestInCurrentScope(BB)) which then is fed to
1179 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180 // to push & pop an FinalizationInfo object.
1181 // The FiniCB will still be needed but at the point where the
1182 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184 assert(IP.getBlock()->end() == IP.getPoint() &&
1185 "Clang CG should cause non-terminated block!");
1186 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187 CGF.Builder.restoreIP(IP);
1188 CodeGenFunction::JumpDest Dest =
1189 CGF.getOMPCancelDestination(OMPD_parallel);
1190 CGF.EmitBranchThroughCleanup(Dest);
1191 return llvm::Error::success();
1192 };
1193
1194 // TODO: Remove this once we emit parallel regions through the
1195 // OpenMPIRBuilder as it can do this setup internally.
1196 llvm::OpenMPIRBuilder::FinalizationInfo FI({FiniCB, Kind, HasCancel});
1197 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1198 }
1199 ~PushAndPopStackRAII() {
1200 if (OMPBuilder)
1201 OMPBuilder->popFinalizationCB();
1202 }
1203 llvm::OpenMPIRBuilder *OMPBuilder;
1204};
1205} // namespace
1206
1207static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211 assert(ThreadIDVar->getType()->isPointerType() &&
1212 "thread id variable must be of type kmp_int32 *");
1213 CodeGenFunction CGF(CGM, true);
1214 bool HasCancel = false;
1215 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1216 HasCancel = OPD->hasCancel();
1217 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1218 HasCancel = OPD->hasCancel();
1219 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1220 HasCancel = OPSD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1224 HasCancel = OPFD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD =
1228 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233
1234 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235 // parallel region to make cancellation barriers work properly.
1236 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239 HasCancel, OutlinedHelperName);
1240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1242}
1243
1244std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245 std::string Suffix = getName(Parts: {"omp_outlined"});
1246 return (Name + Suffix).str();
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1251}
1252
1253std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1255 return (Name + Suffix).str();
1256}
1257
1258llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261 const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(OMPD_parallel);
1263 return emitParallelOrTeamsOutlinedFunction(
1264 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1265 CodeGen);
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(OMPD_teams);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282 bool Tied, unsigned &NumberOfParts) {
1283 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284 PrePostActionTy &) {
1285 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1286 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1287 llvm::Value *TaskArgs[] = {
1288 UpLoc, ThreadID,
1289 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1290 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1291 .getPointer(CGF)};
1292 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
1293 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1294 TaskArgs);
1295 };
1296 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297 UntiedCodeGen);
1298 CodeGen.setAction(Action);
1299 assert(!ThreadIDVar->getType()->isPointerType() &&
1300 "thread id variable must be of type kmp_int32 for tasks");
1301 const OpenMPDirectiveKind Region =
1302 isOpenMPTaskLoopDirective(D.getDirectiveKind()) ? OMPD_taskloop
1303 : OMPD_task;
1304 const CapturedStmt *CS = D.getCapturedStmt(Region);
1305 bool HasCancel = false;
1306 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1309 HasCancel = TD->hasCancel();
1310 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1313 HasCancel = TD->hasCancel();
1314
1315 CodeGenFunction CGF(CGM, true);
1316 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317 InnermostKind, HasCancel, Action);
1318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1320 if (!Tied)
1321 NumberOfParts = Action.getNumberOfParts();
1322 return Res;
1323}
1324
1325void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326 bool AtCurrentPoint) {
1327 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329
1330 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1331 if (AtCurrentPoint) {
1332 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333 CGF.Builder.GetInsertBlock());
1334 } else {
1335 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1337 }
1338}
1339
1340void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342 if (Elem.ServiceInsertPt) {
1343 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344 Elem.ServiceInsertPt = nullptr;
1345 Ptr->eraseFromParent();
1346 }
1347}
1348
1349static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350 SourceLocation Loc,
1351 SmallString<128> &Buffer) {
1352 llvm::raw_svector_ostream OS(Buffer);
1353 // Build debug location
1354 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355 OS << ";";
1356 if (auto *DbgInfo = CGF.getDebugInfo())
1357 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358 else
1359 OS << PLoc.getFilename();
1360 OS << ";";
1361 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1362 OS << FD->getQualifiedNameAsString();
1363 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364 return OS.str();
1365}
1366
1367llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368 SourceLocation Loc,
1369 unsigned Flags, bool EmitLoc) {
1370 uint32_t SrcLocStrSize;
1371 llvm::Constant *SrcLocStr;
1372 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373 llvm::codegenoptions::NoDebugInfo) ||
1374 Loc.isInvalid()) {
1375 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376 } else {
1377 std::string FunctionName;
1378 std::string FileName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 if (auto *DbgInfo = CGF.getDebugInfo())
1383 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384 else
1385 FileName = PLoc.getFilename();
1386 unsigned Line = PLoc.getLine();
1387 unsigned Column = PLoc.getColumn();
1388 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389 Column, SrcLocStrSize);
1390 }
1391 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392 return OMPBuilder.getOrCreateIdent(
1393 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1394}
1395
1396llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397 SourceLocation Loc) {
1398 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400 // the clang invariants used below might be broken.
1401 if (CGM.getLangOpts().OpenMPIRBuilder) {
1402 SmallString<128> Buffer;
1403 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1404 uint32_t SrcLocStrSize;
1405 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407 return OMPBuilder.getOrCreateThreadID(
1408 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 }
1410
1411 llvm::Value *ThreadID = nullptr;
1412 // Check whether we've already cached a load of the thread id in this
1413 // function.
1414 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1415 if (I != OpenMPLocThreadIDMap.end()) {
1416 ThreadID = I->second.ThreadID;
1417 if (ThreadID != nullptr)
1418 return ThreadID;
1419 }
1420 // If exceptions are enabled, do not use parameter to avoid possible crash.
1421 if (auto *OMPRegionInfo =
1422 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1423 if (OMPRegionInfo->getThreadIDVariable()) {
1424 // Check if this an outlined function with thread id passed as argument.
1425 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428 !CGF.getLangOpts().CXXExceptions ||
1429 CGF.Builder.GetInsertBlock() == TopBlock ||
1430 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1431 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1432 TopBlock ||
1433 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1434 CGF.Builder.GetInsertBlock()) {
1435 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1436 // If value loaded in entry block, cache it and use it everywhere in
1437 // function.
1438 if (CGF.Builder.GetInsertBlock() == TopBlock)
1439 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450 if (!Elem.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1455 llvm::CallInst *Call = CGF.Builder.CreateCall(
1456 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1457 FnID: OMPRTL___kmpc_global_thread_num),
1458 Args: emitUpdateLocation(CGF, Loc));
1459 Call->setCallingConv(CGF.getRuntimeCC());
1460 Elem.ThreadID = Call;
1461 return Call;
1462}
1463
1464void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1467 clearLocThreadIdInsertPt(CGF);
1468 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1469 }
1470 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1471 for (const auto *D : I->second)
1472 UDRMap.erase(Val: D);
1473 FunctionUDRMap.erase(I);
1474 }
1475 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1476 for (const auto *D : I->second)
1477 UDMMap.erase(Val: D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1482}
1483
1484llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489convertDeviceClause(const VarDecl *VD) {
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1508 }
1509}
1510
1511static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512convertCaptureClause(const VarDecl *VD) {
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1530 }
1531}
1532
1533static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536
1537 auto FileInfoCallBack = [&]() {
1538 SourceManager &SM = CGM.getContext().getSourceManager();
1539 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1540
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1543 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1544 }
1545
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547 };
1548
1549 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1550}
1551
1552ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1554
1555 auto LinkageForVariable = [&VD, this]() {
1556 return CGM.getLLVMLinkageVarDefinition(VD);
1557 };
1558
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562 T: CGM.getContext().getPointerType(VD->getType()));
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1565 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566 IsExternallyVisible: VD->isExternallyVisible(),
1567 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568 VD->getCanonicalDecl()->getBeginLoc()),
1569 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1570 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1571 VariableLinkage: LinkageForVariable);
1572
1573 if (!addr)
1574 return ConstantAddress::invalid();
1575 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(VD));
1576}
1577
1578llvm::Constant *
1579CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581 !CGM.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName(Parts: {"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1586}
1587
1588Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589 const VarDecl *VD,
1590 Address VDAddr,
1591 SourceLocation Loc) {
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1593 CGM.getContext().getTargetInfo().isTLSSupported())
1594 return VDAddr;
1595
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1598 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1600 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1601 getOrCreateThreadPrivateCache(VD)};
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 callee: OMPBuilder.getOrCreateRuntimeFunction(
1605 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1606 args: Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1608}
1609
1610void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1617 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1618 args: OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc,
1623 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1624 Ctor, CopyCtor, Dtor};
1625 CGF.EmitRuntimeCall(
1626 callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1628 args: Args);
1629}
1630
1631llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633 bool PerformInit, CodeGenFunction *CGF) {
1634 if (CGM.getLangOpts().OpenMPUseTLS &&
1635 CGM.getContext().getTargetInfo().isTLSSupported())
1636 return nullptr;
1637
1638 VD = VD->getDefinition(C&: CGM.getContext());
1639 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1640 QualType ASTTy = VD->getType();
1641
1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643 const Expr *Init = VD->getAnyInitializer();
1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645 // Generate function that re-emits the declaration's initializer into the
1646 // threadprivate copy of the variable VD
1647 CodeGenFunction CtorCGF(CGM);
1648 FunctionArgList Args;
1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651 ImplicitParamKind::Other);
1652 Args.push_back(&Dst);
1653
1654 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655 CGM.getContext().VoidPtrTy, Args);
1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1657 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1658 llvm::Function *Fn =
1659 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1660 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1661 Args, Loc, StartLoc: Loc);
1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1664 CGM.getContext().VoidPtrTy, Dst.getLocation());
1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1666 VDAddr.getAlignment());
1667 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1668 /*IsInitializer=*/true);
1669 ArgVal = CtorCGF.EmitLoadOfScalar(
1670 CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
1671 CGM.getContext().VoidPtrTy, Dst.getLocation());
1672 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1673 CtorCGF.FinishFunction();
1674 Ctor = Fn;
1675 }
1676 if (VD->getType().isDestructedType() != QualType::DK_none) {
1677 // Generate function that emits destructor call for the threadprivate copy
1678 // of the variable VD
1679 CodeGenFunction DtorCGF(CGM);
1680 FunctionArgList Args;
1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683 ImplicitParamKind::Other);
1684 Args.push_back(&Dst);
1685
1686 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687 CGM.getContext().VoidTy, Args);
1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FI);
1689 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1690 llvm::Function *Fn =
1691 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI: FI, Loc);
1692 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1693 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1694 Loc, StartLoc: Loc);
1695 // Create a scope with an artificial location for the body of this function.
1696 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698 DtorCGF.GetAddrOfLocalVar(&Dst),
1699 /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
1700 DtorCGF.emitDestroy(
1701 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1702 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1703 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1704 DtorCGF.FinishFunction();
1705 Dtor = Fn;
1706 }
1707 // Do not emit init function if it is not required.
1708 if (!Ctor && !Dtor)
1709 return nullptr;
1710
1711 // Copying constructor for the threadprivate variable.
1712 // Must be NULL - reserved by runtime, but currently it requires that this
1713 // parameter is always NULL. Otherwise it fires assertion.
1714 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1715 if (Ctor == nullptr) {
1716 Ctor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1717 }
1718 if (Dtor == nullptr) {
1719 Dtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1720 }
1721 if (!CGF) {
1722 auto *InitFunctionTy =
1723 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1724 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1725 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1727 CodeGenFunction InitCGF(CGM);
1728 FunctionArgList ArgList;
1729 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1730 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1731 Loc, StartLoc: Loc);
1732 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733 InitCGF.FinishFunction();
1734 return InitFunction;
1735 }
1736 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 }
1738 return nullptr;
1739}
1740
1741void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742 llvm::GlobalValue *GV) {
1743 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744 OMPDeclareTargetDeclAttr::getActiveAttr(FD);
1745
1746 // We only need to handle active 'indirect' declare target functions.
1747 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748 return;
1749
1750 // Get a mangled name to store the new device global in.
1751 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752 CGM, OMPBuilder, FD->getCanonicalDecl()->getBeginLoc(), FD->getName());
1753 SmallString<128> Name;
1754 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755
1756 // We need to generate a new global to hold the address of the indirectly
1757 // called device function. Doing this allows us to keep the visibility and
1758 // linkage of the associated function unchanged while allowing the runtime to
1759 // access its value.
1760 llvm::GlobalValue *Addr = GV;
1761 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762 Addr = new llvm::GlobalVariable(
1763 CGM.getModule(), CGM.VoidPtrTy,
1764 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765 nullptr, llvm::GlobalValue::NotThreadLocal,
1766 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768 }
1769
1770 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1772 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773 Linkage: llvm::GlobalValue::WeakODRLinkage);
1774}
1775
1776Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777 QualType VarType,
1778 StringRef Name) {
1779 std::string Suffix = getName(Parts: {"artificial", ""});
1780 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1781 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1783 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784 CGM.getTarget().isTLSSupported()) {
1785 GAddr->setThreadLocal(/*Val=*/true);
1786 return Address(GAddr, GAddr->getValueType(),
1787 CGM.getContext().getTypeAlignInChars(T: VarType));
1788 }
1789 std::string CacheSuffix = getName(Parts: {"cache", ""});
1790 llvm::Value *Args[] = {
1791 emitUpdateLocation(CGF, Loc: SourceLocation()),
1792 getThreadID(CGF, Loc: SourceLocation()),
1793 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1794 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1795 /*isSigned=*/false),
1796 OMPBuilder.getOrCreateInternalVariable(
1797 Ty: CGM.VoidPtrPtrTy,
1798 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1799 return Address(
1800 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801 V: CGF.EmitRuntimeCall(
1802 callee: OMPBuilder.getOrCreateRuntimeFunction(
1803 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1804 args: Args),
1805 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1806 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1807}
1808
1809void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810 const RegionCodeGenTy &ThenGen,
1811 const RegionCodeGenTy &ElseGen) {
1812 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813
1814 // If the condition constant folds and can be elided, try to avoid emitting
1815 // the condition and the dead arm of the if/else.
1816 bool CondConstant;
1817 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1818 if (CondConstant)
1819 ThenGen(CGF);
1820 else
1821 ElseGen(CGF);
1822 return;
1823 }
1824
1825 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1826 // emit the conditional branch.
1827 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1828 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1829 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1830 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1831
1832 // Emit the 'then' code.
1833 CGF.EmitBlock(BB: ThenBlock);
1834 ThenGen(CGF);
1835 CGF.EmitBranch(Block: ContBlock);
1836 // Emit the 'else' code if present.
1837 // There is no need to emit line number for unconditional branch.
1838 (void)ApplyDebugLocation::CreateEmpty(CGF);
1839 CGF.EmitBlock(BB: ElseBlock);
1840 ElseGen(CGF);
1841 // There is no need to emit line number for unconditional branch.
1842 (void)ApplyDebugLocation::CreateEmpty(CGF);
1843 CGF.EmitBranch(Block: ContBlock);
1844 // Emit the continuation block for code after the if.
1845 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1846}
1847
1848void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849 llvm::Function *OutlinedFn,
1850 ArrayRef<llvm::Value *> CapturedVars,
1851 const Expr *IfCond,
1852 llvm::Value *NumThreads) {
1853 if (!CGF.HaveInsertPoint())
1854 return;
1855 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856 auto &M = CGM.getModule();
1857 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858 this](CodeGenFunction &CGF, PrePostActionTy &) {
1859 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860 llvm::Value *Args[] = {
1861 RTLoc,
1862 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1863 OutlinedFn};
1864 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1866 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1867
1868 llvm::FunctionCallee RTLFn =
1869 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1870 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1871 };
1872 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873 this](CodeGenFunction &CGF, PrePostActionTy &) {
1874 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876 // Build calls:
1877 // __kmpc_serialized_parallel(&Loc, GTid);
1878 llvm::Value *Args[] = {RTLoc, ThreadID};
1879 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1880 M, FnID: OMPRTL___kmpc_serialized_parallel),
1881 args: Args);
1882
1883 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885 RawAddress ZeroAddrBound =
1886 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1887 /*Name=*/".bound.zero.addr");
1888 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1889 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890 // ThreadId for serialized parallels is 0.
1891 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
1892 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1893 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1894
1895 // Ensure we do not inline the function. This is trivially true for the ones
1896 // passed to __kmpc_fork_call but the ones called in serialized regions
1897 // could be inlined. This is not a perfect but it is closer to the invariant
1898 // we want, namely, every data environment starts with a new function.
1899 // TODO: We should pass the if condition to the runtime function and do the
1900 // handling there. Much cleaner code.
1901 OutlinedFn->removeFnAttr(llvm::Attribute::AlwaysInline);
1902 OutlinedFn->addFnAttr(llvm::Attribute::NoInline);
1903 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1904
1905 // __kmpc_end_serialized_parallel(&Loc, GTid);
1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1908 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1909 args: EndArgs);
1910 };
1911 if (IfCond) {
1912 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1913 } else {
1914 RegionCodeGenTy ThenRCG(ThenGen);
1915 ThenRCG(CGF);
1916 }
1917}
1918
1919// If we're inside an (outlined) parallel region, use the region info's
1920// thread-ID variable (it is passed in a first argument of the outlined function
1921// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922// regular serial code region, get thread ID by calling kmp_int32
1923// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924// return the address of that temp.
1925Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926 SourceLocation Loc) {
1927 if (auto *OMPRegionInfo =
1928 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1929 if (OMPRegionInfo->getThreadIDVariable())
1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931
1932 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933 QualType Int32Ty =
1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1936 CGF.EmitStoreOfScalar(value: ThreadID,
1937 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1938
1939 return ThreadIDTemp;
1940}
1941
1942llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944 std::string Name = getName(Parts: {Prefix, "var"});
1945 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1946}
1947
1948namespace {
1949/// Common pre(post)-action for different OpenMP constructs.
1950class CommonActionTy final : public PrePostActionTy {
1951 llvm::FunctionCallee EnterCallee;
1952 ArrayRef<llvm::Value *> EnterArgs;
1953 llvm::FunctionCallee ExitCallee;
1954 ArrayRef<llvm::Value *> ExitArgs;
1955 bool Conditional;
1956 llvm::BasicBlock *ContBlock = nullptr;
1957
1958public:
1959 CommonActionTy(llvm::FunctionCallee EnterCallee,
1960 ArrayRef<llvm::Value *> EnterArgs,
1961 llvm::FunctionCallee ExitCallee,
1962 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964 ExitArgs(ExitArgs), Conditional(Conditional) {}
1965 void Enter(CodeGenFunction &CGF) override {
1966 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1967 if (Conditional) {
1968 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1969 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1970 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1971 // Generate the branch (If-stmt)
1972 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1973 CGF.EmitBlock(BB: ThenBlock);
1974 }
1975 }
1976 void Done(CodeGenFunction &CGF) {
1977 // Emit the rest of blocks/branches
1978 CGF.EmitBranch(Block: ContBlock);
1979 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
1980 }
1981 void Exit(CodeGenFunction &CGF) override {
1982 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
1983 }
1984};
1985} // anonymous namespace
1986
1987void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988 StringRef CriticalName,
1989 const RegionCodeGenTy &CriticalOpGen,
1990 SourceLocation Loc, const Expr *Hint) {
1991 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992 // CriticalOpGen();
1993 // __kmpc_end_critical(ident_t *, gtid, Lock);
1994 // Prepare arguments and build a call to __kmpc_critical
1995 if (!CGF.HaveInsertPoint())
1996 return;
1997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998 getCriticalRegionLock(CriticalName)};
1999 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2000 std::end(arr&: Args));
2001 if (Hint) {
2002 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2003 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2004 }
2005 CommonActionTy Action(
2006 OMPBuilder.getOrCreateRuntimeFunction(
2007 M&: CGM.getModule(),
2008 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009 EnterArgs,
2010 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2011 FnID: OMPRTL___kmpc_end_critical),
2012 Args);
2013 CriticalOpGen.setAction(Action);
2014 emitInlinedDirective(CGF, OMPD_critical, CriticalOpGen);
2015}
2016
2017void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018 const RegionCodeGenTy &MasterOpGen,
2019 SourceLocation Loc) {
2020 if (!CGF.HaveInsertPoint())
2021 return;
2022 // if(__kmpc_master(ident_t *, gtid)) {
2023 // MasterOpGen();
2024 // __kmpc_end_master(ident_t *, gtid);
2025 // }
2026 // Prepare arguments and build a call to __kmpc_master
2027 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2030 Args,
2031 OMPBuilder.getOrCreateRuntimeFunction(
2032 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2033 Args,
2034 /*Conditional=*/true);
2035 MasterOpGen.setAction(Action);
2036 emitInlinedDirective(CGF, OMPD_master, MasterOpGen);
2037 Action.Done(CGF);
2038}
2039
2040void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041 const RegionCodeGenTy &MaskedOpGen,
2042 SourceLocation Loc, const Expr *Filter) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046 // MaskedOpGen();
2047 // __kmpc_end_masked(iden_t *, gtid);
2048 // }
2049 // Prepare arguments and build a call to __kmpc_masked
2050 llvm::Value *FilterVal = Filter
2051 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2052 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2053 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054 FilterVal};
2055 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056 getThreadID(CGF, Loc)};
2057 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2059 Args,
2060 OMPBuilder.getOrCreateRuntimeFunction(
2061 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2062 ArgsEnd,
2063 /*Conditional=*/true);
2064 MaskedOpGen.setAction(Action);
2065 emitInlinedDirective(CGF, OMPD_masked, MaskedOpGen);
2066 Action.Done(CGF);
2067}
2068
2069void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070 SourceLocation Loc) {
2071 if (!CGF.HaveInsertPoint())
2072 return;
2073 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2075 } else {
2076 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077 llvm::Value *Args[] = {
2078 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2080 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2081 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2082 args: Args);
2083 }
2084
2085 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2086 Region->emitUntiedSwitch(CGF);
2087}
2088
2089void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090 const RegionCodeGenTy &TaskgroupOpGen,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 // __kmpc_taskgroup(ident_t *, gtid);
2095 // TaskgroupOpGen();
2096 // __kmpc_end_taskgroup(ident_t *, gtid);
2097 // Prepare arguments and build a call to __kmpc_taskgroup
2098 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2101 Args,
2102 OMPBuilder.getOrCreateRuntimeFunction(
2103 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2104 Args);
2105 TaskgroupOpGen.setAction(Action);
2106 emitInlinedDirective(CGF, OMPD_taskgroup, TaskgroupOpGen);
2107}
2108
2109/// Given an array of pointers to variables, project the address of a
2110/// given variable.
2111static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112 unsigned Index, const VarDecl *Var) {
2113 // Pull out the pointer to the variable.
2114 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2115 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2116
2117 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2118 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(Var));
2119}
2120
2121static llvm::Value *emitCopyprivateCopyFunction(
2122 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125 SourceLocation Loc) {
2126 ASTContext &C = CGM.getContext();
2127 // void copy_func(void *LHSArg, void *RHSArg);
2128 FunctionArgList Args;
2129 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130 ImplicitParamKind::Other);
2131 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132 ImplicitParamKind::Other);
2133 Args.push_back(&LHSArg);
2134 Args.push_back(&RHSArg);
2135 const auto &CGFI =
2136 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
2137 std::string Name =
2138 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2139 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
2140 llvm::GlobalValue::InternalLinkage, Name,
2141 &CGM.getModule());
2142 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2143 Fn->setDoesNotRecurse();
2144 CodeGenFunction CGF(CGM);
2145 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2146 // Dest = (void*[n])(LHSArg);
2147 // Src = (void*[n])(RHSArg);
2148 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
2150 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2151 ArgsElemType, CGF.getPointerAlign());
2152 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
2154 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2155 ArgsElemType, CGF.getPointerAlign());
2156 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158 // ...
2159 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161 const auto *DestVar =
2162 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2163 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2164
2165 const auto *SrcVar =
2166 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2167 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2168
2169 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2170 QualType Type = VD->getType();
2171 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2172 }
2173 CGF.FinishFunction();
2174 return Fn;
2175}
2176
2177void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178 const RegionCodeGenTy &SingleOpGen,
2179 SourceLocation Loc,
2180 ArrayRef<const Expr *> CopyprivateVars,
2181 ArrayRef<const Expr *> SrcExprs,
2182 ArrayRef<const Expr *> DstExprs,
2183 ArrayRef<const Expr *> AssignmentOps) {
2184 if (!CGF.HaveInsertPoint())
2185 return;
2186 assert(CopyprivateVars.size() == SrcExprs.size() &&
2187 CopyprivateVars.size() == DstExprs.size() &&
2188 CopyprivateVars.size() == AssignmentOps.size());
2189 ASTContext &C = CGM.getContext();
2190 // int32 did_it = 0;
2191 // if(__kmpc_single(ident_t *, gtid)) {
2192 // SingleOpGen();
2193 // __kmpc_end_single(ident_t *, gtid);
2194 // did_it = 1;
2195 // }
2196 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197 // <copy_func>, did_it);
2198
2199 Address DidIt = Address::invalid();
2200 if (!CopyprivateVars.empty()) {
2201 // int32 did_it = 0;
2202 QualType KmpInt32Ty =
2203 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2205 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2206 }
2207 // Prepare arguments and build a call to __kmpc_single
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2211 Args,
2212 OMPBuilder.getOrCreateRuntimeFunction(
2213 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2214 Args,
2215 /*Conditional=*/true);
2216 SingleOpGen.setAction(Action);
2217 emitInlinedDirective(CGF, OMPD_single, SingleOpGen);
2218 if (DidIt.isValid()) {
2219 // did_it = 1;
2220 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2221 }
2222 Action.Done(CGF);
2223 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224 // <copy_func>, did_it);
2225 if (DidIt.isValid()) {
2226 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227 QualType CopyprivateArrayTy = C.getConstantArrayType(
2228 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2229 /*IndexTypeQuals=*/0);
2230 // Create a list of all private variables for copyprivate.
2231 Address CopyprivateList =
2232 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2233 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2235 CGF.Builder.CreateStore(
2236 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2238 DestTy: CGF.VoidPtrTy),
2239 Addr: Elem);
2240 }
2241 // Build function that copies private values from single region to all other
2242 // threads in the corresponding parallel region.
2243 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2245 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2246 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2247 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2249 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2250 llvm::Value *Args[] = {
2251 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252 getThreadID(CGF, Loc), // i32 <gtid>
2253 BufSize, // size_t <buf_size>
2254 CL.emitRawPointer(CGF), // void *<copyprivate list>
2255 CpyFn, // void (*) (void *, void *) <copy_func>
2256 DidItVal // i32 did_it
2257 };
2258 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2259 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2260 args: Args);
2261 }
2262}
2263
2264void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265 const RegionCodeGenTy &OrderedOpGen,
2266 SourceLocation Loc, bool IsThreads) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // __kmpc_ordered(ident_t *, gtid);
2270 // OrderedOpGen();
2271 // __kmpc_end_ordered(ident_t *, gtid);
2272 // Prepare arguments and build a call to __kmpc_ordered
2273 if (IsThreads) {
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2280 Args);
2281 OrderedOpGen.setAction(Action);
2282 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2283 return;
2284 }
2285 emitInlinedDirective(CGF, OMPD_ordered, OrderedOpGen);
2286}
2287
2288unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289 unsigned Flags;
2290 if (Kind == OMPD_for)
2291 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292 else if (Kind == OMPD_sections)
2293 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294 else if (Kind == OMPD_single)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296 else if (Kind == OMPD_barrier)
2297 Flags = OMP_IDENT_BARRIER_EXPL;
2298 else
2299 Flags = OMP_IDENT_BARRIER_IMPL;
2300 return Flags;
2301}
2302
2303void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304 CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306 // Check if the loop directive is actually a doacross loop directive. In this
2307 // case choose static, 1 schedule.
2308 if (llvm::any_of(
2309 S.getClausesOfKind<OMPOrderedClause>(),
2310 [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311 ScheduleKind = OMPC_SCHEDULE_static;
2312 // Chunk size is 1 in this case.
2313 llvm::APInt ChunkSize(32, 1);
2314 ChunkExpr = IntegerLiteral::Create(
2315 C: CGF.getContext(), V: ChunkSize,
2316 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2317 l: SourceLocation());
2318 }
2319}
2320
2321void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322 OpenMPDirectiveKind Kind, bool EmitChecks,
2323 bool ForceSimpleCall) {
2324 // Check if we should use the OMPBuilder
2325 auto *OMPRegionInfo =
2326 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329 cantFail(OMPBuilder.createBarrier(CGF.Builder, Kind, ForceSimpleCall,
2330 EmitChecks));
2331 CGF.Builder.restoreIP(IP: AfterIP);
2332 return;
2333 }
2334
2335 if (!CGF.HaveInsertPoint())
2336 return;
2337 // Build call __kmpc_cancel_barrier(loc, thread_id);
2338 // Build call __kmpc_barrier(loc, thread_id);
2339 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341 // thread_id);
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343 getThreadID(CGF, Loc)};
2344 if (OMPRegionInfo) {
2345 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346 llvm::Value *Result = CGF.EmitRuntimeCall(
2347 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2348 FnID: OMPRTL___kmpc_cancel_barrier),
2349 args: Args);
2350 if (EmitChecks) {
2351 // if (__kmpc_cancel_barrier()) {
2352 // exit from construct;
2353 // }
2354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2357 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2358 CGF.EmitBlock(BB: ExitBB);
2359 // exit from construct;
2360 CodeGenFunction::JumpDest CancelDestination =
2361 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
2362 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2363 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2364 }
2365 return;
2366 }
2367 }
2368 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2369 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2370 args: Args);
2371}
2372
2373void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374 Expr *ME, bool IsFatal) {
2375 llvm::Value *MVL =
2376 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2377 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2378 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379 // *message)
2380 llvm::Value *Args[] = {
2381 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2382 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2383 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2384 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2385 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2386 args: Args);
2387}
2388
2389/// Map the OpenMP loop schedule to the runtime enumeration.
2390static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391 bool Chunked, bool Ordered) {
2392 switch (ScheduleKind) {
2393 case OMPC_SCHEDULE_static:
2394 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395 : (Ordered ? OMP_ord_static : OMP_sch_static);
2396 case OMPC_SCHEDULE_dynamic:
2397 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398 case OMPC_SCHEDULE_guided:
2399 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400 case OMPC_SCHEDULE_runtime:
2401 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402 case OMPC_SCHEDULE_auto:
2403 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404 case OMPC_SCHEDULE_unknown:
2405 assert(!Chunked && "chunk was specified but schedule kind not known");
2406 return Ordered ? OMP_ord_static : OMP_sch_static;
2407 }
2408 llvm_unreachable("Unexpected runtime schedule");
2409}
2410
2411/// Map the OpenMP distribute schedule to the runtime enumeration.
2412static OpenMPSchedType
2413getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414 // only static is allowed for dist_schedule
2415 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416}
2417
2418bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419 bool Chunked) const {
2420 OpenMPSchedType Schedule =
2421 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422 return Schedule == OMP_sch_static;
2423}
2424
2425bool CGOpenMPRuntime::isStaticNonchunked(
2426 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428 return Schedule == OMP_dist_sch_static;
2429}
2430
2431bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432 bool Chunked) const {
2433 OpenMPSchedType Schedule =
2434 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435 return Schedule == OMP_sch_static_chunked;
2436}
2437
2438bool CGOpenMPRuntime::isStaticChunked(
2439 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441 return Schedule == OMP_dist_sch_static_chunked;
2442}
2443
2444bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445 OpenMPSchedType Schedule =
2446 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448 return Schedule != OMP_sch_static;
2449}
2450
2451static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452 OpenMPScheduleClauseModifier M1,
2453 OpenMPScheduleClauseModifier M2) {
2454 int Modifier = 0;
2455 switch (M1) {
2456 case OMPC_SCHEDULE_MODIFIER_monotonic:
2457 Modifier = OMP_sch_modifier_monotonic;
2458 break;
2459 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460 Modifier = OMP_sch_modifier_nonmonotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_simd:
2463 if (Schedule == OMP_sch_static_chunked)
2464 Schedule = OMP_sch_static_balanced_chunked;
2465 break;
2466 case OMPC_SCHEDULE_MODIFIER_last:
2467 case OMPC_SCHEDULE_MODIFIER_unknown:
2468 break;
2469 }
2470 switch (M2) {
2471 case OMPC_SCHEDULE_MODIFIER_monotonic:
2472 Modifier = OMP_sch_modifier_monotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475 Modifier = OMP_sch_modifier_nonmonotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_simd:
2478 if (Schedule == OMP_sch_static_chunked)
2479 Schedule = OMP_sch_static_balanced_chunked;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_last:
2482 case OMPC_SCHEDULE_MODIFIER_unknown:
2483 break;
2484 }
2485 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486 // If the static schedule kind is specified or if the ordered clause is
2487 // specified, and if the nonmonotonic modifier is not specified, the effect is
2488 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489 // modifier is specified, the effect is as if the nonmonotonic modifier is
2490 // specified.
2491 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493 Schedule == OMP_sch_static_balanced_chunked ||
2494 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495 Schedule == OMP_dist_sch_static_chunked ||
2496 Schedule == OMP_dist_sch_static))
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 }
2499 return Schedule | Modifier;
2500}
2501
2502void CGOpenMPRuntime::emitForDispatchInit(
2503 CodeGenFunction &CGF, SourceLocation Loc,
2504 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505 bool Ordered, const DispatchRTInput &DispatchValues) {
2506 if (!CGF.HaveInsertPoint())
2507 return;
2508 OpenMPSchedType Schedule = getRuntimeSchedule(
2509 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2510 assert(Ordered ||
2511 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513 Schedule != OMP_sch_static_balanced_chunked));
2514 // Call __kmpc_dispatch_init(
2515 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2517 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518
2519 // If the Chunk was not specified in the clause - use default value 1.
2520 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521 : CGF.Builder.getIntN(N: IVSize, C: 1);
2522 llvm::Value *Args[] = {
2523 emitUpdateLocation(CGF, Loc),
2524 getThreadID(CGF, Loc),
2525 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2526 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2527 DispatchValues.LB, // Lower
2528 DispatchValues.UB, // Upper
2529 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2530 Chunk // Chunk
2531 };
2532 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533 args: Args);
2534}
2535
2536void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537 SourceLocation Loc) {
2538 if (!CGF.HaveInsertPoint())
2539 return;
2540 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2543}
2544
2545static void emitForStaticInitCall(
2546 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549 const CGOpenMPRuntime::StaticRTInput &Values) {
2550 if (!CGF.HaveInsertPoint())
2551 return;
2552
2553 assert(!Values.Ordered);
2554 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555 Schedule == OMP_sch_static_balanced_chunked ||
2556 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557 Schedule == OMP_dist_sch_static ||
2558 Schedule == OMP_dist_sch_static_chunked);
2559
2560 // Call __kmpc_for_static_init(
2561 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565 llvm::Value *Chunk = Values.Chunk;
2566 if (Chunk == nullptr) {
2567 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568 Schedule == OMP_dist_sch_static) &&
2569 "expected static non-chunked schedule");
2570 // If the Chunk was not specified in the clause - use default value 1.
2571 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2572 } else {
2573 assert((Schedule == OMP_sch_static_chunked ||
2574 Schedule == OMP_sch_static_balanced_chunked ||
2575 Schedule == OMP_ord_static_chunked ||
2576 Schedule == OMP_dist_sch_static_chunked) &&
2577 "expected static chunked schedule");
2578 }
2579 llvm::Value *Args[] = {
2580 UpdateLocation,
2581 ThreadId,
2582 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2583 M2)), // Schedule type
2584 Values.IL.emitRawPointer(CGF), // &isLastIter
2585 Values.LB.emitRawPointer(CGF), // &LB
2586 Values.UB.emitRawPointer(CGF), // &UB
2587 Values.ST.emitRawPointer(CGF), // &Stride
2588 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2589 Chunk // Chunk
2590 };
2591 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2592}
2593
2594void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595 SourceLocation Loc,
2596 OpenMPDirectiveKind DKind,
2597 const OpenMPScheduleTy &ScheduleKind,
2598 const StaticRTInput &Values) {
2599 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2601 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602 "Expected loop-based or sections-based directive.");
2603 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604 isOpenMPLoopDirective(DKind)
2605 ? OMP_IDENT_WORK_LOOP
2606 : OMP_IDENT_WORK_SECTIONS);
2607 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608 llvm::FunctionCallee StaticInitFunction =
2609 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2610 IsGPUDistribute: false);
2611 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2612 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2613 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2614}
2615
2616void CGOpenMPRuntime::emitDistributeStaticInit(
2617 CodeGenFunction &CGF, SourceLocation Loc,
2618 OpenMPDistScheduleClauseKind SchedKind,
2619 const CGOpenMPRuntime::StaticRTInput &Values) {
2620 OpenMPSchedType ScheduleNum =
2621 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2622 llvm::Value *UpdatedLocation =
2623 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2624 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625 llvm::FunctionCallee StaticInitFunction;
2626 bool isGPUDistribute =
2627 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2630
2631 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2632 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2633 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634}
2635
2636void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637 SourceLocation Loc,
2638 OpenMPDirectiveKind DKind) {
2639 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640 DKind == OMPD_sections) &&
2641 "Expected distribute, for, or sections directive kind");
2642 if (!CGF.HaveInsertPoint())
2643 return;
2644 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645 llvm::Value *Args[] = {
2646 emitUpdateLocation(CGF, Loc,
2647 isOpenMPDistributeDirective(DKind) ||
2648 (DKind == OMPD_target_teams_loop)
2649 ? OMP_IDENT_WORK_DISTRIBUTE
2650 : isOpenMPLoopDirective(DKind)
2651 ? OMP_IDENT_WORK_LOOP
2652 : OMP_IDENT_WORK_SECTIONS),
2653 getThreadID(CGF, Loc)};
2654 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2655 if (isOpenMPDistributeDirective(DKind) &&
2656 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657 CGF.EmitRuntimeCall(
2658 OMPBuilder.getOrCreateRuntimeFunction(
2659 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2660 Args);
2661 else
2662 CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
2663 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2664 Args);
2665}
2666
2667void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668 SourceLocation Loc,
2669 unsigned IVSize,
2670 bool IVSigned) {
2671 if (!CGF.HaveInsertPoint())
2672 return;
2673 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676 args: Args);
2677}
2678
2679llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680 SourceLocation Loc, unsigned IVSize,
2681 bool IVSigned, Address IL,
2682 Address LB, Address UB,
2683 Address ST) {
2684 // Call __kmpc_dispatch_next(
2685 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687 // kmp_int[32|64] *p_stride);
2688 llvm::Value *Args[] = {
2689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690 IL.emitRawPointer(CGF), // &isLastIter
2691 LB.emitRawPointer(CGF), // &Lower
2692 UB.emitRawPointer(CGF), // &Upper
2693 ST.emitRawPointer(CGF) // &Stride
2694 };
2695 llvm::Value *Call = CGF.EmitRuntimeCall(
2696 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2697 return CGF.EmitScalarConversion(
2698 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2699 DstTy: CGF.getContext().BoolTy, Loc);
2700}
2701
2702void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703 llvm::Value *NumThreads,
2704 SourceLocation Loc) {
2705 if (!CGF.HaveInsertPoint())
2706 return;
2707 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708 llvm::Value *Args[] = {
2709 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2711 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2712 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2713 args: Args);
2714}
2715
2716void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717 ProcBindKind ProcBind,
2718 SourceLocation Loc) {
2719 if (!CGF.HaveInsertPoint())
2720 return;
2721 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723 llvm::Value *Args[] = {
2724 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2726 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2727 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2728 args: Args);
2729}
2730
2731void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734 OMPBuilder.createFlush(Loc: CGF.Builder);
2735 } else {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2738 // Build call void __kmpc_flush(ident_t *loc)
2739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2740 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2741 args: emitUpdateLocation(CGF, Loc));
2742 }
2743}
2744
2745namespace {
2746/// Indexes of fields for type kmp_task_t.
2747enum KmpTaskTFields {
2748 /// List of shared variables.
2749 KmpTaskTShareds,
2750 /// Task routine.
2751 KmpTaskTRoutine,
2752 /// Partition id for the untied tasks.
2753 KmpTaskTPartId,
2754 /// Function with call of destructors for private variables.
2755 Data1,
2756 /// Task priority.
2757 Data2,
2758 /// (Taskloops only) Lower bound.
2759 KmpTaskTLowerBound,
2760 /// (Taskloops only) Upper bound.
2761 KmpTaskTUpperBound,
2762 /// (Taskloops only) Stride.
2763 KmpTaskTStride,
2764 /// (Taskloops only) Is last iteration flag.
2765 KmpTaskTLastIter,
2766 /// (Taskloops only) Reduction data.
2767 KmpTaskTReductions,
2768};
2769} // anonymous namespace
2770
2771void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772 // If we are in simd mode or there are no entries, we don't need to do
2773 // anything.
2774 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775 return;
2776
2777 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780 SourceLocation Loc;
2781 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783 E = CGM.getContext().getSourceManager().fileinfo_end();
2784 I != E; ++I) {
2785 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2789 break;
2790 }
2791 }
2792 }
2793 switch (Kind) {
2794 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2797 "%0 is incorrect: either the "
2798 "address or the ID is invalid.");
2799 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800 } break;
2801 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2804 "variable %0 is incorrect: the "
2805 "address is invalid.");
2806 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807 } break;
2808 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810 L: DiagnosticsEngine::Error,
2811 FormatString: "Offloading entry for declare target variable is incorrect: the "
2812 "address is invalid.");
2813 CGM.getDiags().Report(DiagID);
2814 } break;
2815 }
2816 };
2817
2818 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2819}
2820
2821void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822 if (!KmpRoutineEntryPtrTy) {
2823 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824 ASTContext &C = CGM.getContext();
2825 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826 FunctionProtoType::ExtProtoInfo EPI;
2827 KmpRoutineEntryPtrQTy = C.getPointerType(
2828 C.getFunctionType(KmpInt32Ty, KmpRoutineEntryTyArgs, EPI));
2829 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(KmpRoutineEntryPtrQTy);
2830 }
2831}
2832
2833namespace {
2834struct PrivateHelpersTy {
2835 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838 PrivateElemInit(PrivateElemInit) {}
2839 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840 const Expr *OriginalRef = nullptr;
2841 const VarDecl *Original = nullptr;
2842 const VarDecl *PrivateCopy = nullptr;
2843 const VarDecl *PrivateElemInit = nullptr;
2844 bool isLocalPrivate() const {
2845 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846 }
2847};
2848typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849} // anonymous namespace
2850
2851static bool isAllocatableDecl(const VarDecl *VD) {
2852 const VarDecl *CVD = VD->getCanonicalDecl();
2853 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854 return false;
2855 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856 // Use the default allocation.
2857 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858 !AA->getAllocator());
2859}
2860
2861static RecordDecl *
2862createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863 if (!Privates.empty()) {
2864 ASTContext &C = CGM.getContext();
2865 // Build struct .kmp_privates_t. {
2866 // /* private vars */
2867 // };
2868 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2869 RD->startDefinition();
2870 for (const auto &Pair : Privates) {
2871 const VarDecl *VD = Pair.second.Original;
2872 QualType Type = VD->getType().getNonReferenceType();
2873 // If the private variable is a local variable with lvalue ref type,
2874 // allocate the pointer instead of the pointee type.
2875 if (Pair.second.isLocalPrivate()) {
2876 if (VD->getType()->isLValueReferenceType())
2877 Type = C.getPointerType(T: Type);
2878 if (isAllocatableDecl(VD))
2879 Type = C.getPointerType(T: Type);
2880 }
2881 FieldDecl *FD = addFieldToRecordDecl(C, RD, Type);
2882 if (VD->hasAttrs()) {
2883 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884 E(VD->getAttrs().end());
2885 I != E; ++I)
2886 FD->addAttr(*I);
2887 }
2888 }
2889 RD->completeDefinition();
2890 return RD;
2891 }
2892 return nullptr;
2893}
2894
2895static RecordDecl *
2896createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897 QualType KmpInt32Ty,
2898 QualType KmpRoutineEntryPointerQTy) {
2899 ASTContext &C = CGM.getContext();
2900 // Build struct kmp_task_t {
2901 // void * shareds;
2902 // kmp_routine_entry_t routine;
2903 // kmp_int32 part_id;
2904 // kmp_cmplrdata_t data1;
2905 // kmp_cmplrdata_t data2;
2906 // For taskloops additional fields:
2907 // kmp_uint64 lb;
2908 // kmp_uint64 ub;
2909 // kmp_int64 st;
2910 // kmp_int32 liter;
2911 // void * reductions;
2912 // };
2913 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2914 UD->startDefinition();
2915 addFieldToRecordDecl(C, UD, KmpInt32Ty);
2916 addFieldToRecordDecl(C, UD, KmpRoutineEntryPointerQTy);
2917 UD->completeDefinition();
2918 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2919 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2920 RD->startDefinition();
2921 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2922 addFieldToRecordDecl(C, RD, KmpRoutineEntryPointerQTy);
2923 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2924 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2925 addFieldToRecordDecl(C, RD, KmpCmplrdataTy);
2926 if (isOpenMPTaskLoopDirective(Kind)) {
2927 QualType KmpUInt64Ty =
2928 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929 QualType KmpInt64Ty =
2930 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2932 addFieldToRecordDecl(C, RD, KmpUInt64Ty);
2933 addFieldToRecordDecl(C, RD, KmpInt64Ty);
2934 addFieldToRecordDecl(C, RD, KmpInt32Ty);
2935 addFieldToRecordDecl(C, RD, C.VoidPtrTy);
2936 }
2937 RD->completeDefinition();
2938 return RD;
2939}
2940
2941static RecordDecl *
2942createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943 ArrayRef<PrivateDataTy> Privates) {
2944 ASTContext &C = CGM.getContext();
2945 // Build struct kmp_task_t_with_privates {
2946 // kmp_task_t task_data;
2947 // .kmp_privates_t. privates;
2948 // };
2949 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2950 RD->startDefinition();
2951 addFieldToRecordDecl(C, RD, KmpTaskTQTy);
2952 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953 addFieldToRecordDecl(C, RD, C.getRecordType(Decl: PrivateRD));
2954 RD->completeDefinition();
2955 return RD;
2956}
2957
2958/// Emit a proxy function which accepts kmp_task_t as the second
2959/// argument.
2960/// \code
2961/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963/// For taskloops:
2964/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965/// tt->reductions, tt->shareds);
2966/// return 0;
2967/// }
2968/// \endcode
2969static llvm::Function *
2970emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972 QualType KmpTaskTWithPrivatesPtrQTy,
2973 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975 llvm::Value *TaskPrivatesMap) {
2976 ASTContext &C = CGM.getContext();
2977 FunctionArgList Args;
2978 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979 ImplicitParamKind::Other);
2980 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982 ImplicitParamKind::Other);
2983 Args.push_back(&GtidArg);
2984 Args.push_back(&TaskTypeArg);
2985 const auto &TaskEntryFnInfo =
2986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
2987 llvm::FunctionType *TaskEntryTy =
2988 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
2989 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
2990 auto *TaskEntry = llvm::Function::Create(
2991 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
2992 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
2993 TaskEntry->setDoesNotRecurse();
2994 CodeGenFunction CGF(CGM);
2995 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
2996 Loc, StartLoc: Loc);
2997
2998 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999 // tt,
3000 // For taskloops:
3001 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002 // tt->task_data.shareds);
3003 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004 Addr: CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3005 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3007 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008 const auto *KmpTaskTWithPrivatesQTyRD =
3009 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010 LValue Base =
3011 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3013 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3014 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3015 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016
3017 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3018 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3019 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3021 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3022
3023 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3024 llvm::Value *PrivatesParam;
3025 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3027 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3029 } else {
3030 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3031 }
3032
3033 llvm::Value *CommonArgs[] = {
3034 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035 CGF.Builder
3036 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3037 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3038 .emitRawPointer(CGF)};
3039 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3040 std::end(arr&: CommonArgs));
3041 if (isOpenMPTaskLoopDirective(Kind)) {
3042 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3043 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3044 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3045 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3046 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3047 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3048 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3049 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3050 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3051 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3052 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3053 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3054 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3055 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3056 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3057 CallArgs.push_back(Elt: LBParam);
3058 CallArgs.push_back(Elt: UBParam);
3059 CallArgs.push_back(Elt: StParam);
3060 CallArgs.push_back(Elt: LIParam);
3061 CallArgs.push_back(Elt: RParam);
3062 }
3063 CallArgs.push_back(Elt: SharedsParam);
3064
3065 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3066 Args: CallArgs);
3067 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3068 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3069 CGF.FinishFunction();
3070 return TaskEntry;
3071}
3072
3073static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074 SourceLocation Loc,
3075 QualType KmpInt32Ty,
3076 QualType KmpTaskTWithPrivatesPtrQTy,
3077 QualType KmpTaskTWithPrivatesQTy) {
3078 ASTContext &C = CGM.getContext();
3079 FunctionArgList Args;
3080 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081 ImplicitParamKind::Other);
3082 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084 ImplicitParamKind::Other);
3085 Args.push_back(&GtidArg);
3086 Args.push_back(&TaskTypeArg);
3087 const auto &DestructorFnInfo =
3088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3089 llvm::FunctionType *DestructorFnTy =
3090 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3091 std::string Name =
3092 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3093 auto *DestructorFn =
3094 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3095 N: Name, M: &CGM.getModule());
3096 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3097 FI: DestructorFnInfo);
3098 DestructorFn->setDoesNotRecurse();
3099 CodeGenFunction CGF(CGM);
3100 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3101 Args, Loc, StartLoc: Loc);
3102
3103 LValue Base = CGF.EmitLoadOfPointerLValue(
3104 Ptr: CGF.GetAddrOfLocalVar(&TaskTypeArg),
3105 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106 const auto *KmpTaskTWithPrivatesQTyRD =
3107 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3109 Base = CGF.EmitLValueForField(Base, Field: *FI);
3110 for (const auto *Field :
3111 cast<RecordDecl>(FI->getType()->getAsTagDecl())->fields()) {
3112 if (QualType::DestructionKind DtorKind =
3113 Field->getType().isDestructedType()) {
3114 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115 CGF.pushDestroy(DtorKind, FieldLValue.getAddress(), Field->getType());
3116 }
3117 }
3118 CGF.FinishFunction();
3119 return DestructorFn;
3120}
3121
3122/// Emit a privates mapping function for correct handling of private and
3123/// firstprivate variables.
3124/// \code
3125/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126/// **noalias priv1,..., <tyn> **noalias privn) {
3127/// *priv1 = &.privates.priv1;
3128/// ...;
3129/// *privn = &.privates.privn;
3130/// }
3131/// \endcode
3132static llvm::Value *
3133emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135 ArrayRef<PrivateDataTy> Privates) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl TaskPrivatesArg(
3139 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3141 ImplicitParamKind::Other);
3142 Args.push_back(&TaskPrivatesArg);
3143 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144 unsigned Counter = 1;
3145 for (const Expr *E : Data.PrivateVars) {
3146 Args.push_back(ImplicitParamDecl::Create(
3147 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3148 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3149 .withConst()
3150 .withRestrict(),
3151 ParamKind: ImplicitParamKind::Other));
3152 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3153 PrivateVarsPos[VD] = Counter;
3154 ++Counter;
3155 }
3156 for (const Expr *E : Data.FirstprivateVars) {
3157 Args.push_back(ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3159 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3160 .withConst()
3161 .withRestrict(),
3162 ParamKind: ImplicitParamKind::Other));
3163 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3164 PrivateVarsPos[VD] = Counter;
3165 ++Counter;
3166 }
3167 for (const Expr *E : Data.LastprivateVars) {
3168 Args.push_back(ImplicitParamDecl::Create(
3169 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3170 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3171 .withConst()
3172 .withRestrict(),
3173 ParamKind: ImplicitParamKind::Other));
3174 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3175 PrivateVarsPos[VD] = Counter;
3176 ++Counter;
3177 }
3178 for (const VarDecl *VD : Data.PrivateLocals) {
3179 QualType Ty = VD->getType().getNonReferenceType();
3180 if (VD->getType()->isLValueReferenceType())
3181 Ty = C.getPointerType(T: Ty);
3182 if (isAllocatableDecl(VD))
3183 Ty = C.getPointerType(T: Ty);
3184 Args.push_back(ImplicitParamDecl::Create(
3185 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3186 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3187 ParamKind: ImplicitParamKind::Other));
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 const auto &TaskPrivatesMapFnInfo =
3192 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3193 llvm::FunctionType *TaskPrivatesMapTy =
3194 CGM.getTypes().GetFunctionType(TaskPrivatesMapFnInfo);
3195 std::string Name =
3196 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3197 auto *TaskPrivatesMap = llvm::Function::Create(
3198 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3199 M: &CGM.getModule());
3200 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3201 FI: TaskPrivatesMapFnInfo);
3202 if (CGM.getLangOpts().Optimize) {
3203 TaskPrivatesMap->removeFnAttr(llvm::Attribute::NoInline);
3204 TaskPrivatesMap->removeFnAttr(llvm::Attribute::OptimizeNone);
3205 TaskPrivatesMap->addFnAttr(llvm::Attribute::AlwaysInline);
3206 }
3207 CodeGenFunction CGF(CGM);
3208 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3209 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3210
3211 // *privi = &.privates.privi;
3212 LValue Base = CGF.EmitLoadOfPointerLValue(
3213 Ptr: CGF.GetAddrOfLocalVar(&TaskPrivatesArg),
3214 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3215 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3216 Counter = 0;
3217 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220 LValue RefLVal =
3221 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
3222 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3224 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3225 ++Counter;
3226 }
3227 CGF.FinishFunction();
3228 return TaskPrivatesMap;
3229}
3230
3231/// Emit initialization for private variables in task-based directives.
3232static void emitPrivatesInit(CodeGenFunction &CGF,
3233 const OMPExecutableDirective &D,
3234 Address KmpTaskSharedsPtr, LValue TDBase,
3235 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236 QualType SharedsTy, QualType SharedsPtrTy,
3237 const OMPTaskDataTy &Data,
3238 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239 ASTContext &C = CGF.getContext();
3240 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3241 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3242 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(D.getDirectiveKind())
3243 ? OMPD_taskloop
3244 : OMPD_task;
3245 const CapturedStmt &CS = *D.getCapturedStmt(Kind);
3246 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247 LValue SrcBase;
3248 bool IsTargetTask =
3249 isOpenMPTargetDataManagementDirective(D.getDirectiveKind()) ||
3250 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
3251 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252 // PointersArray, SizesArray, and MappersArray. The original variables for
3253 // these arrays are not captured and we get their addresses explicitly.
3254 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256 SrcBase = CGF.MakeAddrLValue(
3257 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3259 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3260 T: SharedsTy);
3261 }
3262 FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
3263 for (const PrivateDataTy &Pair : Privates) {
3264 // Do not initialize private locals.
3265 if (Pair.second.isLocalPrivate()) {
3266 ++FI;
3267 continue;
3268 }
3269 const VarDecl *VD = Pair.second.PrivateCopy;
3270 const Expr *Init = VD->getAnyInitializer();
3271 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3272 !CGF.isTrivialInitializer(Init)))) {
3273 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3274 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275 const VarDecl *OriginalVD = Pair.second.Original;
3276 // Check if the variable is the target-based BasePointersArray,
3277 // PointersArray, SizesArray, or MappersArray.
3278 LValue SharedRefLValue;
3279 QualType Type = PrivateLValue.getType();
3280 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3281 if (IsTargetTask && !SharedField) {
3282 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284 cast<CapturedDecl>(OriginalVD->getDeclContext())
3285 ->getNumParams() == 0 &&
3286 isa<TranslationUnitDecl>(
3287 cast<CapturedDecl>(OriginalVD->getDeclContext())
3288 ->getDeclContext()) &&
3289 "Expected artificial target data variable.");
3290 SharedRefLValue =
3291 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3292 } else if (ForDup) {
3293 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3294 SharedRefLValue = CGF.MakeAddrLValue(
3295 Addr: SharedRefLValue.getAddress().withAlignment(
3296 NewAlignment: C.getDeclAlign(OriginalVD)),
3297 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3298 TBAAInfo: SharedRefLValue.getTBAAInfo());
3299 } else if (CGF.LambdaCaptureFields.count(
3300 Pair.second.Original->getCanonicalDecl()) > 0 ||
3301 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3302 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3303 } else {
3304 // Processing for implicitly captured variables.
3305 InlinedOpenMPRegionRAII Region(
3306 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307 /*HasCancel=*/false, /*NoInheritance=*/true);
3308 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3309 }
3310 if (Type->isArrayType()) {
3311 // Initialize firstprivate array.
3312 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3313 // Perform simple memcpy.
3314 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3315 } else {
3316 // Initialize firstprivate array using element-by-element
3317 // initialization.
3318 CGF.EmitOMPAggregateAssign(
3319 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3320 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321 Address SrcElement) {
3322 // Clean up any temporaries needed by the initialization.
3323 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3325 (void)InitScope.Privatize();
3326 // Emit initialization for single element.
3327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328 CGF, &CapturesInfo);
3329 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3330 Quals: Init->getType().getQualifiers(),
3331 /*IsInitializer=*/false);
3332 });
3333 }
3334 } else {
3335 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3337 (void)InitScope.Privatize();
3338 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339 CGF.EmitExprAsInit(Init, VD, PrivateLValue,
3340 /*capturedByInit=*/false);
3341 }
3342 } else {
3343 CGF.EmitExprAsInit(Init, VD, PrivateLValue, /*capturedByInit=*/false);
3344 }
3345 }
3346 ++FI;
3347 }
3348}
3349
3350/// Check if duplication function is required for taskloops.
3351static bool checkInitIsRequired(CodeGenFunction &CGF,
3352 ArrayRef<PrivateDataTy> Privates) {
3353 bool InitRequired = false;
3354 for (const PrivateDataTy &Pair : Privates) {
3355 if (Pair.second.isLocalPrivate())
3356 continue;
3357 const VarDecl *VD = Pair.second.PrivateCopy;
3358 const Expr *Init = VD->getAnyInitializer();
3359 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3360 !CGF.isTrivialInitializer(Init));
3361 if (InitRequired)
3362 break;
3363 }
3364 return InitRequired;
3365}
3366
3367
3368/// Emit task_dup function (for initialization of
3369/// private/firstprivate/lastprivate vars and last_iter flag)
3370/// \code
3371/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372/// lastpriv) {
3373/// // setup lastprivate flag
3374/// task_dst->last = lastpriv;
3375/// // could be constructor calls here...
3376/// }
3377/// \endcode
3378static llvm::Value *
3379emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380 const OMPExecutableDirective &D,
3381 QualType KmpTaskTWithPrivatesPtrQTy,
3382 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386 ASTContext &C = CGM.getContext();
3387 FunctionArgList Args;
3388 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389 KmpTaskTWithPrivatesPtrQTy,
3390 ImplicitParamKind::Other);
3391 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392 KmpTaskTWithPrivatesPtrQTy,
3393 ImplicitParamKind::Other);
3394 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395 ImplicitParamKind::Other);
3396 Args.push_back(&DstArg);
3397 Args.push_back(&SrcArg);
3398 Args.push_back(&LastprivArg);
3399 const auto &TaskDupFnInfo =
3400 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
3401 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(TaskDupFnInfo);
3402 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3403 auto *TaskDup = llvm::Function::Create(
3404 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3405 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3406 TaskDup->setDoesNotRecurse();
3407 CodeGenFunction CGF(CGM);
3408 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3409 StartLoc: Loc);
3410
3411 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412 Ptr: CGF.GetAddrOfLocalVar(&DstArg),
3413 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414 // task_dst->liter = lastpriv;
3415 if (WithLastIter) {
3416 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3417 LValue Base = CGF.EmitLValueForField(
3418 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3419 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3420 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421 CGF.GetAddrOfLocalVar(&LastprivArg), /*Volatile=*/false, C.IntTy, Loc);
3422 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3423 }
3424
3425 // Emit initial values for private copies (if any).
3426 assert(!Privates.empty());
3427 Address KmpTaskSharedsPtr = Address::invalid();
3428 if (!Data.FirstprivateVars.empty()) {
3429 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430 Ptr: CGF.GetAddrOfLocalVar(&SrcArg),
3431 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432 LValue Base = CGF.EmitLValueForField(
3433 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3434 KmpTaskSharedsPtr = Address(
3435 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3436 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3437 n: KmpTaskTShareds)),
3438 Loc),
3439 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3440 }
3441 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443 CGF.FinishFunction();
3444 return TaskDup;
3445}
3446
3447/// Checks if destructor function is required to be generated.
3448/// \return true if cleanups are required, false otherwise.
3449static bool
3450checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451 ArrayRef<PrivateDataTy> Privates) {
3452 for (const PrivateDataTy &P : Privates) {
3453 if (P.second.isLocalPrivate())
3454 continue;
3455 QualType Ty = P.second.Original->getType().getNonReferenceType();
3456 if (Ty.isDestructedType())
3457 return true;
3458 }
3459 return false;
3460}
3461
3462namespace {
3463/// Loop generator for OpenMP iterator expression.
3464class OMPIteratorGeneratorScope final
3465 : public CodeGenFunction::OMPPrivateScope {
3466 CodeGenFunction &CGF;
3467 const OMPIteratorExpr *E = nullptr;
3468 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470 OMPIteratorGeneratorScope() = delete;
3471 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472
3473public:
3474 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476 if (!E)
3477 return;
3478 SmallVector<llvm::Value *, 4> Uppers;
3479 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3481 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3482 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(VD->getType(), VD->getName()));
3483 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484 addPrivate(
3485 LocalVD: HelperData.CounterVD,
3486 Addr: CGF.CreateMemTemp(HelperData.CounterVD->getType(), "counter.addr"));
3487 }
3488 Privatize();
3489
3490 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492 LValue CLVal =
3493 CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3494 HelperData.CounterVD->getType());
3495 // Counter = 0;
3496 CGF.EmitStoreOfScalar(
3497 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3498 lvalue: CLVal);
3499 CodeGenFunction::JumpDest &ContDest =
3500 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3501 CodeGenFunction::JumpDest &ExitDest =
3502 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3503 // N = <number-of_iterations>;
3504 llvm::Value *N = Uppers[I];
3505 // cont:
3506 // if (Counter < N) goto body; else goto exit;
3507 CGF.EmitBlock(BB: ContDest.getBlock());
3508 auto *CVal =
3509 CGF.EmitLoadOfScalar(CLVal, HelperData.CounterVD->getLocation());
3510 llvm::Value *Cmp =
3511 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3513 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3514 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3515 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3516 // body:
3517 CGF.EmitBlock(BB: BodyBB);
3518 // Iteri = Begini + Counter * Stepi;
3519 CGF.EmitIgnoredExpr(E: HelperData.Update);
3520 }
3521 }
3522 ~OMPIteratorGeneratorScope() {
3523 if (!E)
3524 return;
3525 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526 // Counter = Counter + 1;
3527 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3528 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3529 // goto cont;
3530 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3531 // exit:
3532 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533 }
3534 }
3535};
3536} // namespace
3537
3538static std::pair<llvm::Value *, llvm::Value *>
3539getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3541 llvm::Value *Addr;
3542 if (OASE) {
3543 const Expr *Base = OASE->getBase();
3544 Addr = CGF.EmitScalarExpr(E: Base);
3545 } else {
3546 Addr = CGF.EmitLValue(E).getPointer(CGF);
3547 }
3548 llvm::Value *SizeVal;
3549 QualType Ty = E->getType();
3550 if (OASE) {
3551 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3552 for (const Expr *SE : OASE->getDimensions()) {
3553 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3554 Sz = CGF.EmitScalarConversion(
3555 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3556 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3557 }
3558 } else if (const auto *ASE =
3559 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3560 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3561 Address UpAddrAddress = UpAddrLVal.getAddress();
3562 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3564 /*Idx0=*/1);
3565 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3566 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3567 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3568 } else {
3569 SizeVal = CGF.getTypeSize(Ty);
3570 }
3571 return std::make_pair(x&: Addr, y&: SizeVal);
3572}
3573
3574/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3575static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3577 if (KmpTaskAffinityInfoTy.isNull()) {
3578 RecordDecl *KmpAffinityInfoRD =
3579 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3580 KmpAffinityInfoRD->startDefinition();
3581 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getIntPtrType());
3582 addFieldToRecordDecl(C, KmpAffinityInfoRD, C.getSizeType());
3583 addFieldToRecordDecl(C, KmpAffinityInfoRD, FlagsTy);
3584 KmpAffinityInfoRD->completeDefinition();
3585 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3586 }
3587}
3588
3589CGOpenMPRuntime::TaskResultTy
3590CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591 const OMPExecutableDirective &D,
3592 llvm::Function *TaskFunction, QualType SharedsTy,
3593 Address Shareds, const OMPTaskDataTy &Data) {
3594 ASTContext &C = CGM.getContext();
3595 llvm::SmallVector<PrivateDataTy, 4> Privates;
3596 // Aggregate privates and sort them by the alignment.
3597 const auto *I = Data.PrivateCopies.begin();
3598 for (const Expr *E : Data.PrivateVars) {
3599 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3600 Privates.emplace_back(
3601 Args: C.getDeclAlign(VD),
3602 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3603 /*PrivateElemInit=*/nullptr));
3604 ++I;
3605 }
3606 I = Data.FirstprivateCopies.begin();
3607 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608 for (const Expr *E : Data.FirstprivateVars) {
3609 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3610 Privates.emplace_back(
3611 Args: C.getDeclAlign(VD),
3612 Args: PrivateHelpersTy(
3613 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3614 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3615 ++I;
3616 ++IElemInitRef;
3617 }
3618 I = Data.LastprivateCopies.begin();
3619 for (const Expr *E : Data.LastprivateVars) {
3620 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3621 Privates.emplace_back(
3622 Args: C.getDeclAlign(VD),
3623 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3624 /*PrivateElemInit=*/nullptr));
3625 ++I;
3626 }
3627 for (const VarDecl *VD : Data.PrivateLocals) {
3628 if (isAllocatableDecl(VD))
3629 Privates.emplace_back(CGM.getPointerAlign(), PrivateHelpersTy(VD));
3630 else
3631 Privates.emplace_back(Args: C.getDeclAlign(VD), Args: PrivateHelpersTy(VD));
3632 }
3633 llvm::stable_sort(Range&: Privates,
3634 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635 return L.first > R.first;
3636 });
3637 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638 // Build type kmp_routine_entry_t (if not built yet).
3639 emitKmpRoutineEntryT(KmpInt32Ty);
3640 // Build type kmp_task_t (if not built yet).
3641 if (isOpenMPTaskLoopDirective(D.getDirectiveKind())) {
3642 if (SavedKmpTaskloopTQTy.isNull()) {
3643 SavedKmpTaskloopTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3644 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3645 }
3646 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647 } else {
3648 assert((D.getDirectiveKind() == OMPD_task ||
3649 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651 "Expected taskloop, task or target directive");
3652 if (SavedKmpTaskTQTy.isNull()) {
3653 SavedKmpTaskTQTy = C.getRecordType(createKmpTaskTRecordDecl(
3654 CGM, D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPtrQTy));
3655 }
3656 KmpTaskTQTy = SavedKmpTaskTQTy;
3657 }
3658 const auto *KmpTaskTQTyRD = cast<RecordDecl>(KmpTaskTQTy->getAsTagDecl());
3659 // Build particular struct kmp_task_t for the given task.
3660 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3663 QualType KmpTaskTWithPrivatesPtrQTy =
3664 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3665 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3666 llvm::Value *KmpTaskTWithPrivatesTySize =
3667 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3668 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3669
3670 // Emit initial values for private copies (if any).
3671 llvm::Value *TaskPrivatesMap = nullptr;
3672 llvm::Type *TaskPrivatesMapTy =
3673 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3674 if (!Privates.empty()) {
3675 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3676 TaskPrivatesMap =
3677 emitTaskPrivateMappingFunction(CGM, Loc, Data, FI->getType(), Privates);
3678 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3680 } else {
3681 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3683 }
3684 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685 // kmp_task_t *tt);
3686 llvm::Function *TaskEntry = emitProxyTaskFunction(
3687 CGM, Loc, D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689 TaskPrivatesMap);
3690
3691 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693 // kmp_routine_entry_t *task_entry);
3694 // Task flags. Format is taken from
3695 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696 // description of kmp_tasking_flags struct.
3697 enum {
3698 TiedFlag = 0x1,
3699 FinalFlag = 0x2,
3700 DestructorsFlag = 0x8,
3701 PriorityFlag = 0x20,
3702 DetachableFlag = 0x40,
3703 };
3704 unsigned Flags = Data.Tied ? TiedFlag : 0;
3705 bool NeedsCleanup = false;
3706 if (!Privates.empty()) {
3707 NeedsCleanup =
3708 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709 if (NeedsCleanup)
3710 Flags = Flags | DestructorsFlag;
3711 }
3712 if (Data.Priority.getInt())
3713 Flags = Flags | PriorityFlag;
3714 if (D.hasClausesOfKind<OMPDetachClause>())
3715 Flags = Flags | DetachableFlag;
3716 llvm::Value *TaskFlags =
3717 Data.Final.getPointer()
3718 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3719 True: CGF.Builder.getInt32(C: FinalFlag),
3720 False: CGF.Builder.getInt32(/*C=*/0))
3721 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3722 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3723 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3724 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3728 llvm::Value *NewTask;
3729 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730 // Check if we have any device clause associated with the directive.
3731 const Expr *Device = nullptr;
3732 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733 Device = C->getDevice();
3734 // Emit device ID if any otherwise use default value.
3735 llvm::Value *DeviceID;
3736 if (Device)
3737 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3738 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3739 else
3740 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3741 AllocArgs.push_back(Elt: DeviceID);
3742 NewTask = CGF.EmitRuntimeCall(
3743 callee: OMPBuilder.getOrCreateRuntimeFunction(
3744 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3745 args: AllocArgs);
3746 } else {
3747 NewTask =
3748 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3749 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3750 args: AllocArgs);
3751 }
3752 // Emit detach clause initialization.
3753 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754 // task_descriptor);
3755 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3758
3759 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760 // int gtid, kmp_task_t *task);
3761 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3762 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3763 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3764 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765 callee: OMPBuilder.getOrCreateRuntimeFunction(
3766 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3767 args: {Loc, Tid, NewTask});
3768 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3769 Loc: Evt->getExprLoc());
3770 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3771 }
3772 // Process affinity clauses.
3773 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774 // Process list of affinity data.
3775 ASTContext &C = CGM.getContext();
3776 Address AffinitiesArray = Address::invalid();
3777 // Calculate number of elements to form the array of affinity data.
3778 llvm::Value *NumOfElements = nullptr;
3779 unsigned NumAffinities = 0;
3780 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781 if (const Expr *Modifier = C->getModifier()) {
3782 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3783 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3785 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3786 NumOfElements =
3787 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3788 }
3789 } else {
3790 NumAffinities += C->varlist_size();
3791 }
3792 }
3793 getKmpAffinityType(CGM.getContext(), KmpTaskAffinityInfoTy);
3794 // Fields ids in kmp_task_affinity_info record.
3795 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796
3797 QualType KmpTaskAffinityInfoArrayTy;
3798 if (NumOfElements) {
3799 NumOfElements = CGF.Builder.CreateNUWAdd(
3800 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3801 auto *OVE = new (C) OpaqueValueExpr(
3802 Loc,
3803 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3804 VK_PRValue);
3805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806 RValue::get(V: NumOfElements));
3807 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808 KmpTaskAffinityInfoTy, OVE, ArraySizeModifier::Normal,
3809 /*IndexTypeQuals=*/0);
3810 // Properly emit variable-sized array.
3811 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3812 ParamKind: ImplicitParamKind::Other);
3813 CGF.EmitVarDecl(*PD);
3814 AffinitiesArray = CGF.GetAddrOfLocalVar(PD);
3815 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3816 /*isSigned=*/false);
3817 } else {
3818 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819 KmpTaskAffinityInfoTy,
3820 llvm::APInt(C.getTypeSize(C.getSizeType()), NumAffinities), nullptr,
3821 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822 AffinitiesArray =
3823 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3824 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3825 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3826 /*isSigned=*/IsSigned: false);
3827 }
3828
3829 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830 // Fill array by elements without iterators.
3831 unsigned Pos = 0;
3832 bool HasIterator = false;
3833 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834 if (C->getModifier()) {
3835 HasIterator = true;
3836 continue;
3837 }
3838 for (const Expr *E : C->varlist()) {
3839 llvm::Value *Addr;
3840 llvm::Value *Size;
3841 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3842 LValue Base =
3843 CGF.MakeAddrLValue(CGF.Builder.CreateConstGEP(AffinitiesArray, Pos),
3844 KmpTaskAffinityInfoTy);
3845 // affs[i].base_addr = &<Affinities[i].second>;
3846 LValue BaseAddrLVal = CGF.EmitLValueForField(
3847 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3848 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3849 BaseAddrLVal);
3850 // affs[i].len = sizeof(<Affinities[i].second>);
3851 LValue LenLVal = CGF.EmitLValueForField(
3852 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3853 CGF.EmitStoreOfScalar(Size, LenLVal);
3854 ++Pos;
3855 }
3856 }
3857 LValue PosLVal;
3858 if (HasIterator) {
3859 PosLVal = CGF.MakeAddrLValue(
3860 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3861 T: C.getSizeType());
3862 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3863 }
3864 // Process elements with iterators.
3865 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866 const Expr *Modifier = C->getModifier();
3867 if (!Modifier)
3868 continue;
3869 OMPIteratorGeneratorScope IteratorScope(
3870 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3871 for (const Expr *E : C->varlist()) {
3872 llvm::Value *Addr;
3873 llvm::Value *Size;
3874 std::tie(Addr, Size) = getPointerAndSize(CGF, E);
3875 llvm::Value *Idx = CGF.EmitLoadOfScalar(PosLVal, E->getExprLoc());
3876 LValue Base =
3877 CGF.MakeAddrLValue(CGF.Builder.CreateGEP(CGF, AffinitiesArray, Idx),
3878 KmpTaskAffinityInfoTy);
3879 // affs[i].base_addr = &<Affinities[i].second>;
3880 LValue BaseAddrLVal = CGF.EmitLValueForField(
3881 Base, *std::next(KmpAffinityInfoRD->field_begin(), BaseAddr));
3882 CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(Addr, CGF.IntPtrTy),
3883 BaseAddrLVal);
3884 // affs[i].len = sizeof(<Affinities[i].second>);
3885 LValue LenLVal = CGF.EmitLValueForField(
3886 Base, *std::next(KmpAffinityInfoRD->field_begin(), Len));
3887 CGF.EmitStoreOfScalar(Size, LenLVal);
3888 Idx = CGF.Builder.CreateNUWAdd(
3889 Idx, llvm::ConstantInt::get(Idx->getType(), 1));
3890 CGF.EmitStoreOfScalar(Idx, PosLVal);
3891 }
3892 }
3893 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895 // naffins, kmp_task_affinity_info_t *affin_list);
3896 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897 llvm::Value *GTid = getThreadID(CGF, Loc);
3898 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
3900 // FIXME: Emit the function and ignore its result for now unless the
3901 // runtime function is properly implemented.
3902 (void)CGF.EmitRuntimeCall(
3903 callee: OMPBuilder.getOrCreateRuntimeFunction(
3904 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3905 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906 }
3907 llvm::Value *NewTaskNewTaskTTy =
3908 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3910 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
3911 T: KmpTaskTWithPrivatesQTy);
3912 LValue TDBase =
3913 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3914 // Fill the data in the resulting kmp_task_t record.
3915 // Copy shareds if there are any.
3916 Address KmpTaskSharedsPtr = Address::invalid();
3917 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918 KmpTaskSharedsPtr = Address(
3919 CGF.EmitLoadOfScalar(
3920 CGF.EmitLValueForField(
3921 Base: TDBase,
3922 Field: *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
3923 Loc),
3924 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3925 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3926 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3927 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3928 }
3929 // Emit initial values for private copies (if any).
3930 TaskResultTy Result;
3931 if (!Privates.empty()) {
3932 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3933 SharedsTy, SharedsPtrTy, Data, Privates,
3934 /*ForDup=*/false);
3935 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
3936 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937 Result.TaskDupFn = emitTaskDupFunction(
3938 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940 /*WithLastIter=*/!Data.LastprivateVars.empty());
3941 }
3942 }
3943 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944 enum { Priority = 0, Destructors = 1 };
3945 // Provide pointer to function with destructors for privates.
3946 auto FI = std::next(KmpTaskTQTyRD->field_begin(), Data1);
3947 const RecordDecl *KmpCmplrdataUD =
3948 (*FI)->getType()->getAsUnionType()->getDecl();
3949 if (NeedsCleanup) {
3950 llvm::Value *DestructorFn = emitDestructorsFunction(
3951 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952 KmpTaskTWithPrivatesQTy);
3953 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3954 LValue DestructorsLV = CGF.EmitLValueForField(
3955 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3956 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3958 lvalue: DestructorsLV);
3959 }
3960 // Set priority.
3961 if (Data.Priority.getInt()) {
3962 LValue Data2LV = CGF.EmitLValueForField(
3963 Base: TDBase, Field: *std::next(KmpTaskTQTyRD->field_begin(), Data2));
3964 LValue PriorityLV = CGF.EmitLValueForField(
3965 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3966 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3967 }
3968 Result.NewTask = NewTask;
3969 Result.TaskEntry = TaskEntry;
3970 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971 Result.TDBase = TDBase;
3972 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973 return Result;
3974}
3975
3976/// Translates internal dependency kind into the runtime kind.
3977static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978 RTLDependenceKindTy DepKind;
3979 switch (K) {
3980 case OMPC_DEPEND_in:
3981 DepKind = RTLDependenceKindTy::DepIn;
3982 break;
3983 // Out and InOut dependencies must use the same code.
3984 case OMPC_DEPEND_out:
3985 case OMPC_DEPEND_inout:
3986 DepKind = RTLDependenceKindTy::DepInOut;
3987 break;
3988 case OMPC_DEPEND_mutexinoutset:
3989 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990 break;
3991 case OMPC_DEPEND_inoutset:
3992 DepKind = RTLDependenceKindTy::DepInOutSet;
3993 break;
3994 case OMPC_DEPEND_outallmemory:
3995 DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996 break;
3997 case OMPC_DEPEND_source:
3998 case OMPC_DEPEND_sink:
3999 case OMPC_DEPEND_depobj:
4000 case OMPC_DEPEND_inoutallmemory:
4001 case OMPC_DEPEND_unknown:
4002 llvm_unreachable("Unknown task dependence type");
4003 }
4004 return DepKind;
4005}
4006
4007/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4008static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009 QualType &FlagsTy) {
4010 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(C.BoolTy), /*Signed=*/false);
4011 if (KmpDependInfoTy.isNull()) {
4012 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4013 KmpDependInfoRD->startDefinition();
4014 addFieldToRecordDecl(C, KmpDependInfoRD, C.getIntPtrType());
4015 addFieldToRecordDecl(C, KmpDependInfoRD, C.getSizeType());
4016 addFieldToRecordDecl(C, KmpDependInfoRD, FlagsTy);
4017 KmpDependInfoRD->completeDefinition();
4018 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4019 }
4020}
4021
4022std::pair<llvm::Value *, LValue>
4023CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024 SourceLocation Loc) {
4025 ASTContext &C = CGM.getContext();
4026 QualType FlagsTy;
4027 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028 RecordDecl *KmpDependInfoRD =
4029 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4030 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4031 LValue Base = CGF.EmitLoadOfPointerLValue(
4032 Ptr: DepobjLVal.getAddress().withElementType(
4033 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4034 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4035 Address DepObjAddr = CGF.Builder.CreateGEP(
4036 CGF, Addr: Base.getAddress(),
4037 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4038 LValue NumDepsBase = CGF.MakeAddrLValue(
4039 DepObjAddr, KmpDependInfoTy, Base.getBaseInfo(), Base.getTBAAInfo());
4040 // NumDeps = deps[i].base_addr;
4041 LValue BaseAddrLVal = CGF.EmitLValueForField(
4042 Base: NumDepsBase,
4043 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4044 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4046 return std::make_pair(x&: NumDeps, y&: Base);
4047}
4048
4049static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050 llvm::PointerUnion<unsigned *, LValue *> Pos,
4051 const OMPTaskDataTy::DependData &Data,
4052 Address DependenciesArray) {
4053 CodeGenModule &CGM = CGF.CGM;
4054 ASTContext &C = CGM.getContext();
4055 QualType FlagsTy;
4056 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057 RecordDecl *KmpDependInfoRD =
4058 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4060
4061 OMPIteratorGeneratorScope IteratorScope(
4062 CGF, cast_or_null<OMPIteratorExpr>(
4063 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064 : nullptr));
4065 for (const Expr *E : Data.DepExprs) {
4066 llvm::Value *Addr;
4067 llvm::Value *Size;
4068
4069 // The expression will be a nullptr in the 'omp_all_memory' case.
4070 if (E) {
4071 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4072 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4073 } else {
4074 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4075 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4076 }
4077 LValue Base;
4078 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4079 Base = CGF.MakeAddrLValue(
4080 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4081 } else {
4082 assert(E && "Expected a non-null expression");
4083 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4084 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4085 Base = CGF.MakeAddrLValue(
4086 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4087 }
4088 // deps[i].base_addr = &<Dependencies[i].second>;
4089 LValue BaseAddrLVal = CGF.EmitLValueForField(
4090 Base,
4091 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4092 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4094 // deps[i].len = sizeof(<Dependencies[i].second>);
4095 LValue LenLVal = CGF.EmitLValueForField(
4096 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4097 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4099 // deps[i].flags = <Dependencies[i].first>;
4100 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4101 LValue FlagsLVal = CGF.EmitLValueForField(
4102 Base,
4103 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4104 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105 CGF.EmitStoreOfScalar(
4106 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4107 lvalue: FlagsLVal);
4108 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4109 ++(*P);
4110 } else {
4111 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4112 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4113 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4114 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4115 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4116 }
4117 }
4118}
4119
4120SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122 const OMPTaskDataTy::DependData &Data) {
4123 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124 "Expected depobj dependency kind.");
4125 SmallVector<llvm::Value *, 4> Sizes;
4126 SmallVector<LValue, 4> SizeLVals;
4127 ASTContext &C = CGF.getContext();
4128 {
4129 OMPIteratorGeneratorScope IteratorScope(
4130 CGF, cast_or_null<OMPIteratorExpr>(
4131 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132 : nullptr));
4133 for (const Expr *E : Data.DepExprs) {
4134 llvm::Value *NumDeps;
4135 LValue Base;
4136 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4137 std::tie(NumDeps, Base) =
4138 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4139 LValue NumLVal = CGF.MakeAddrLValue(
4140 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4141 T: C.getUIntPtrType());
4142 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4143 Addr: NumLVal.getAddress());
4144 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4145 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4146 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4147 SizeLVals.push_back(Elt: NumLVal);
4148 }
4149 }
4150 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151 llvm::Value *Size =
4152 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4153 Sizes.push_back(Elt: Size);
4154 }
4155 return Sizes;
4156}
4157
4158void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159 QualType &KmpDependInfoTy,
4160 LValue PosLVal,
4161 const OMPTaskDataTy::DependData &Data,
4162 Address DependenciesArray) {
4163 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164 "Expected depobj dependency kind.");
4165 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4166 {
4167 OMPIteratorGeneratorScope IteratorScope(
4168 CGF, cast_or_null<OMPIteratorExpr>(
4169 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170 : nullptr));
4171 for (unsigned I = 0, End = Data.DepExprs.size(); I < End; ++I) {
4172 const Expr *E = Data.DepExprs[I];
4173 llvm::Value *NumDeps;
4174 LValue Base;
4175 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4176 std::tie(NumDeps, Base) =
4177 getDepobjElements(CGF, DepobjLVal, E->getExprLoc());
4178
4179 // memcopy dependency data.
4180 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4181 LHS: ElSize,
4182 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4183 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4184 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4185 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4186
4187 // Increase pos.
4188 // pos += size;
4189 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4190 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4191 }
4192 }
4193}
4194
4195std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4196 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4197 SourceLocation Loc) {
4198 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4199 return D.DepExprs.empty();
4200 }))
4201 return std::make_pair(x: nullptr, y: Address::invalid());
4202 // Process list of dependencies.
4203 ASTContext &C = CGM.getContext();
4204 Address DependenciesArray = Address::invalid();
4205 llvm::Value *NumOfElements = nullptr;
4206 unsigned NumDependencies = std::accumulate(
4207 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4208 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4209 return D.DepKind == OMPC_DEPEND_depobj
4210 ? V
4211 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4212 });
4213 QualType FlagsTy;
4214 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4215 bool HasDepobjDeps = false;
4216 bool HasRegularWithIterators = false;
4217 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4218 llvm::Value *NumOfRegularWithIterators =
4219 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4220 // Calculate number of depobj dependencies and regular deps with the
4221 // iterators.
4222 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4223 if (D.DepKind == OMPC_DEPEND_depobj) {
4224 SmallVector<llvm::Value *, 4> Sizes =
4225 emitDepobjElementsSizes(CGF, KmpDependInfoTy, D);
4226 for (llvm::Value *Size : Sizes) {
4227 NumOfDepobjElements =
4228 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4229 }
4230 HasDepobjDeps = true;
4231 continue;
4232 }
4233 // Include number of iterations, if any.
4234
4235 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4236 llvm::Value *ClauseIteratorSpace =
4237 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4238 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4239 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4240 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4241 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4242 }
4243 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4244 LHS: ClauseIteratorSpace,
4245 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4246 NumOfRegularWithIterators =
4247 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4248 HasRegularWithIterators = true;
4249 continue;
4250 }
4251 }
4252
4253 QualType KmpDependInfoArrayTy;
4254 if (HasDepobjDeps || HasRegularWithIterators) {
4255 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4256 /*isSigned=*/IsSigned: false);
4257 if (HasDepobjDeps) {
4258 NumOfElements =
4259 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4260 }
4261 if (HasRegularWithIterators) {
4262 NumOfElements =
4263 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4264 }
4265 auto *OVE = new (C) OpaqueValueExpr(
4266 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4267 VK_PRValue);
4268 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4269 RValue::get(V: NumOfElements));
4270 KmpDependInfoArrayTy =
4271 C.getVariableArrayType(KmpDependInfoTy, OVE, ArraySizeModifier::Normal,
4272 /*IndexTypeQuals=*/0);
4273 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4274 // Properly emit variable-sized array.
4275 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4276 ParamKind: ImplicitParamKind::Other);
4277 CGF.EmitVarDecl(*PD);
4278 DependenciesArray = CGF.GetAddrOfLocalVar(PD);
4279 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4280 /*isSigned=*/false);
4281 } else {
4282 KmpDependInfoArrayTy = C.getConstantArrayType(
4283 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies), nullptr,
4284 ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4285 DependenciesArray =
4286 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4287 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4288 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4289 /*isSigned=*/IsSigned: false);
4290 }
4291 unsigned Pos = 0;
4292 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4293 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4294 Dependencies[I].IteratorExpr)
4295 continue;
4296 emitDependData(CGF, KmpDependInfoTy, &Pos, Dependencies[I],
4297 DependenciesArray);
4298 }
4299 // Copy regular dependencies with iterators.
4300 LValue PosLVal = CGF.MakeAddrLValue(
4301 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4302 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4303 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4304 if (Dependencies[I].DepKind == OMPC_DEPEND_depobj ||
4305 !Dependencies[I].IteratorExpr)
4306 continue;
4307 emitDependData(CGF, KmpDependInfoTy, &PosLVal, Dependencies[I],
4308 DependenciesArray);
4309 }
4310 // Copy final depobj arrays without iterators.
4311 if (HasDepobjDeps) {
4312 for (unsigned I = 0, End = Dependencies.size(); I < End; ++I) {
4313 if (Dependencies[I].DepKind != OMPC_DEPEND_depobj)
4314 continue;
4315 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Dependencies[I],
4316 DependenciesArray);
4317 }
4318 }
4319 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4320 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4321 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4322}
4323
4324Address CGOpenMPRuntime::emitDepobjDependClause(
4325 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4326 SourceLocation Loc) {
4327 if (Dependencies.DepExprs.empty())
4328 return Address::invalid();
4329 // Process list of dependencies.
4330 ASTContext &C = CGM.getContext();
4331 Address DependenciesArray = Address::invalid();
4332 unsigned NumDependencies = Dependencies.DepExprs.size();
4333 QualType FlagsTy;
4334 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4335 RecordDecl *KmpDependInfoRD =
4336 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4337
4338 llvm::Value *Size;
4339 // Define type kmp_depend_info[<Dependencies.size()>];
4340 // For depobj reserve one extra element to store the number of elements.
4341 // It is required to handle depobj(x) update(in) construct.
4342 // kmp_depend_info[<Dependencies.size()>] deps;
4343 llvm::Value *NumDepsVal;
4344 CharUnits Align = C.getTypeAlignInChars(KmpDependInfoTy);
4345 if (const auto *IE =
4346 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4347 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4348 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4349 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4350 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4351 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4352 }
4353 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4354 RHS: NumDepsVal);
4355 CharUnits SizeInBytes =
4356 C.getTypeSizeInChars(KmpDependInfoTy).alignTo(Align);
4357 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4358 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4359 NumDepsVal =
4360 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4361 } else {
4362 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4363 KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4364 nullptr, ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4365 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4366 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4367 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4368 }
4369 // Need to allocate on the dynamic memory.
4370 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4371 // Use default allocator.
4372 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4373 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4374
4375 llvm::Value *Addr =
4376 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4377 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4378 args: Args, name: ".dep.arr.addr");
4379 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(KmpDependInfoTy);
4380 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4381 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4382 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4383 // Write number of elements in the first element of array for depobj.
4384 LValue Base = CGF.MakeAddrLValue(DependenciesArray, KmpDependInfoTy);
4385 // deps[i].base_addr = NumDependencies;
4386 LValue BaseAddrLVal = CGF.EmitLValueForField(
4387 Base,
4388 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4389 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4390 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4391 llvm::PointerUnion<unsigned *, LValue *> Pos;
4392 unsigned Idx = 1;
4393 LValue PosLVal;
4394 if (Dependencies.IteratorExpr) {
4395 PosLVal = CGF.MakeAddrLValue(
4396 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4397 T: C.getSizeType());
4398 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4399 /*IsInit=*/isInit: true);
4400 Pos = &PosLVal;
4401 } else {
4402 Pos = &Idx;
4403 }
4404 emitDependData(CGF, KmpDependInfoTy, Pos, Dependencies, DependenciesArray);
4405 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4406 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4407 ElementTy: CGF.Int8Ty);
4408 return DependenciesArray;
4409}
4410
4411void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4412 SourceLocation Loc) {
4413 ASTContext &C = CGM.getContext();
4414 QualType FlagsTy;
4415 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4416 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4417 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4418 QualType KmpDependInfoPtrTy = C.getPointerType(KmpDependInfoTy);
4419 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4420 Base.getAddress(), CGF.ConvertTypeForMem(KmpDependInfoPtrTy),
4421 CGF.ConvertTypeForMem(KmpDependInfoTy));
4422 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4423 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4424 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4425 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4426 DestTy: CGF.VoidPtrTy);
4427 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4428 // Use default allocator.
4429 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4430 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4431
4432 // _kmpc_free(gtid, addr, nullptr);
4433 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4434 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4435 args: Args);
4436}
4437
4438void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4439 OpenMPDependClauseKind NewDepKind,
4440 SourceLocation Loc) {
4441 ASTContext &C = CGM.getContext();
4442 QualType FlagsTy;
4443 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4444 RecordDecl *KmpDependInfoRD =
4445 cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
4446 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4447 llvm::Value *NumDeps;
4448 LValue Base;
4449 std::tie(NumDeps, Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4450
4451 Address Begin = Base.getAddress();
4452 // Cast from pointer to array type to pointer to single element.
4453 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4454 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4455 // The basic structure here is a while-do loop.
4456 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4457 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4458 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4459 CGF.EmitBlock(BB: BodyBB);
4460 llvm::PHINode *ElementPHI =
4461 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4462 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4463 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4464 Base = CGF.MakeAddrLValue(Begin, KmpDependInfoTy, Base.getBaseInfo(),
4465 Base.getTBAAInfo());
4466 // deps[i].flags = NewDepKind;
4467 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4468 LValue FlagsLVal = CGF.EmitLValueForField(
4469 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4470 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4471 CGF.EmitStoreOfScalar(
4472 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4473 lvalue: FlagsLVal);
4474
4475 // Shift the address forward by one element.
4476 llvm::Value *ElementNext =
4477 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4478 .emitRawPointer(CGF);
4479 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4480 llvm::Value *IsEmpty =
4481 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4482 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4483 // Done.
4484 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4485}
4486
4487void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4488 const OMPExecutableDirective &D,
4489 llvm::Function *TaskFunction,
4490 QualType SharedsTy, Address Shareds,
4491 const Expr *IfCond,
4492 const OMPTaskDataTy &Data) {
4493 if (!CGF.HaveInsertPoint())
4494 return;
4495
4496 TaskResultTy Result =
4497 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4498 llvm::Value *NewTask = Result.NewTask;
4499 llvm::Function *TaskEntry = Result.TaskEntry;
4500 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4501 LValue TDBase = Result.TDBase;
4502 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4503 // Process list of dependences.
4504 Address DependenciesArray = Address::invalid();
4505 llvm::Value *NumOfElements;
4506 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4507 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4508
4509 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4510 // libcall.
4511 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4512 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4513 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4514 // list is not empty
4515 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4516 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4517 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4518 llvm::Value *DepTaskArgs[7];
4519 if (!Data.Dependences.empty()) {
4520 DepTaskArgs[0] = UpLoc;
4521 DepTaskArgs[1] = ThreadID;
4522 DepTaskArgs[2] = NewTask;
4523 DepTaskArgs[3] = NumOfElements;
4524 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4525 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4526 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4527 }
4528 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4529 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4530 if (!Data.Tied) {
4531 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4532 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4533 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4534 }
4535 if (!Data.Dependences.empty()) {
4536 CGF.EmitRuntimeCall(
4537 callee: OMPBuilder.getOrCreateRuntimeFunction(
4538 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4539 args: DepTaskArgs);
4540 } else {
4541 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4542 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4543 args: TaskArgs);
4544 }
4545 // Check if parent region is untied and build return for untied task;
4546 if (auto *Region =
4547 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4548 Region->emitUntiedSwitch(CGF);
4549 };
4550
4551 llvm::Value *DepWaitTaskArgs[7];
4552 if (!Data.Dependences.empty()) {
4553 DepWaitTaskArgs[0] = UpLoc;
4554 DepWaitTaskArgs[1] = ThreadID;
4555 DepWaitTaskArgs[2] = NumOfElements;
4556 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4557 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4558 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4559 DepWaitTaskArgs[6] =
4560 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4561 }
4562 auto &M = CGM.getModule();
4563 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4564 TaskEntry, &Data, &DepWaitTaskArgs,
4565 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4566 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4567 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4568 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4569 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4570 // is specified.
4571 if (!Data.Dependences.empty())
4572 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4573 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4574 args: DepWaitTaskArgs);
4575 // Call proxy_task_entry(gtid, new_task);
4576 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4577 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4578 Action.Enter(CGF);
4579 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4580 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4581 Args: OutlinedFnArgs);
4582 };
4583
4584 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4585 // kmp_task_t *new_task);
4586 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4587 // kmp_task_t *new_task);
4588 RegionCodeGenTy RCG(CodeGen);
4589 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4590 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4591 TaskArgs,
4592 OMPBuilder.getOrCreateRuntimeFunction(
4593 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4594 TaskArgs);
4595 RCG.setAction(Action);
4596 RCG(CGF);
4597 };
4598
4599 if (IfCond) {
4600 emitIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
4601 } else {
4602 RegionCodeGenTy ThenRCG(ThenCodeGen);
4603 ThenRCG(CGF);
4604 }
4605}
4606
4607void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4608 const OMPLoopDirective &D,
4609 llvm::Function *TaskFunction,
4610 QualType SharedsTy, Address Shareds,
4611 const Expr *IfCond,
4612 const OMPTaskDataTy &Data) {
4613 if (!CGF.HaveInsertPoint())
4614 return;
4615 TaskResultTy Result =
4616 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4617 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4618 // libcall.
4619 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4620 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4621 // sched, kmp_uint64 grainsize, void *task_dup);
4622 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4623 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4624 llvm::Value *IfVal;
4625 if (IfCond) {
4626 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4627 /*isSigned=*/true);
4628 } else {
4629 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4630 }
4631
4632 LValue LBLVal = CGF.EmitLValueForField(
4633 Base: Result.TDBase,
4634 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4635 const auto *LBVar =
4636 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4637 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4638 /*IsInitializer=*/true);
4639 LValue UBLVal = CGF.EmitLValueForField(
4640 Base: Result.TDBase,
4641 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4642 const auto *UBVar =
4643 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4644 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4645 /*IsInitializer=*/true);
4646 LValue StLVal = CGF.EmitLValueForField(
4647 Base: Result.TDBase,
4648 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4649 const auto *StVar =
4650 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4651 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4652 /*IsInitializer=*/true);
4653 // Store reductions address.
4654 LValue RedLVal = CGF.EmitLValueForField(
4655 Base: Result.TDBase,
4656 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4657 if (Data.Reductions) {
4658 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4659 } else {
4660 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4661 Ty: CGF.getContext().VoidPtrTy);
4662 }
4663 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4664 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4665 UpLoc,
4666 ThreadID,
4667 Result.NewTask,
4668 IfVal,
4669 LBLVal.getPointer(CGF),
4670 UBLVal.getPointer(CGF),
4671 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4672 llvm::ConstantInt::getSigned(
4673 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4674 llvm::ConstantInt::getSigned(
4675 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4676 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4677 : NoSchedule),
4678 Data.Schedule.getPointer()
4679 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4680 /*isSigned=*/false)
4681 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4682 if (Data.HasModifier)
4683 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4684
4685 TaskArgs.push_back(Elt: Result.TaskDupFn
4686 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4687 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4688 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4689 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4690 M&: CGM.getModule(), FnID: Data.HasModifier
4691 ? OMPRTL___kmpc_taskloop_5
4692 : OMPRTL___kmpc_taskloop),
4693 args: TaskArgs);
4694}
4695
4696/// Emit reduction operation for each element of array (required for
4697/// array sections) LHS op = RHS.
4698/// \param Type Type of array.
4699/// \param LHSVar Variable on the left side of the reduction operation
4700/// (references element of array in original variable).
4701/// \param RHSVar Variable on the right side of the reduction operation
4702/// (references element of array in original variable).
4703/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4704/// RHSVar.
4705static void EmitOMPAggregateReduction(
4706 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4707 const VarDecl *RHSVar,
4708 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4709 const Expr *, const Expr *)> &RedOpGen,
4710 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4711 const Expr *UpExpr = nullptr) {
4712 // Perform element-by-element initialization.
4713 QualType ElementTy;
4714 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4715 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4716
4717 // Drill down to the base element type on both arrays.
4718 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4719 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4720
4721 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4722 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4723 // Cast from pointer to array type to pointer to single element.
4724 llvm::Value *LHSEnd =
4725 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4726 // The basic structure here is a while-do loop.
4727 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4728 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4729 llvm::Value *IsEmpty =
4730 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4731 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4732
4733 // Enter the loop body, making that address the current address.
4734 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4735 CGF.EmitBlock(BB: BodyBB);
4736
4737 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4738
4739 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4740 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4741 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4742 Address RHSElementCurrent(
4743 RHSElementPHI, RHSAddr.getElementType(),
4744 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4745
4746 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4747 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4748 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4749 Address LHSElementCurrent(
4750 LHSElementPHI, LHSAddr.getElementType(),
4751 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4752
4753 // Emit copy.
4754 CodeGenFunction::OMPPrivateScope Scope(CGF);
4755 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4756 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4757 Scope.Privatize();
4758 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4759 Scope.ForceCleanup();
4760
4761 // Shift the address forward by one element.
4762 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4763 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4764 Name: "omp.arraycpy.dest.element");
4765 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4766 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4767 Name: "omp.arraycpy.src.element");
4768 // Check whether we've reached the end.
4769 llvm::Value *Done =
4770 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4771 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4772 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4773 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4774
4775 // Done.
4776 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4777}
4778
4779/// Emit reduction combiner. If the combiner is a simple expression emit it as
4780/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4781/// UDR combiner function.
4782static void emitReductionCombiner(CodeGenFunction &CGF,
4783 const Expr *ReductionOp) {
4784 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4785 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4786 if (const auto *DRE =
4787 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4788 if (const auto *DRD =
4789 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4790 std::pair<llvm::Function *, llvm::Function *> Reduction =
4791 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4792 RValue Func = RValue::get(V: Reduction.first);
4793 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4794 CGF.EmitIgnoredExpr(E: ReductionOp);
4795 return;
4796 }
4797 CGF.EmitIgnoredExpr(E: ReductionOp);
4798}
4799
4800llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4801 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4802 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4803 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4804 ASTContext &C = CGM.getContext();
4805
4806 // void reduction_func(void *LHSArg, void *RHSArg);
4807 FunctionArgList Args;
4808 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4809 ImplicitParamKind::Other);
4810 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4811 ImplicitParamKind::Other);
4812 Args.push_back(&LHSArg);
4813 Args.push_back(&RHSArg);
4814 const auto &CGFI =
4815 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
4816 std::string Name = getReductionFuncName(Name: ReducerName);
4817 auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
4818 llvm::GlobalValue::InternalLinkage, Name,
4819 &CGM.getModule());
4820 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4821 Fn->setDoesNotRecurse();
4822 CodeGenFunction CGF(CGM);
4823 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4824
4825 // Dst = (void*[n])(LHSArg);
4826 // Src = (void*[n])(RHSArg);
4827 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4828 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&LHSArg)),
4829 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4830 ArgsElemType, CGF.getPointerAlign());
4831 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4832 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(&RHSArg)),
4833 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4834 ArgsElemType, CGF.getPointerAlign());
4835
4836 // ...
4837 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4838 // ...
4839 CodeGenFunction::OMPPrivateScope Scope(CGF);
4840 const auto *IPriv = Privates.begin();
4841 unsigned Idx = 0;
4842 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4843 const auto *RHSVar =
4844 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4845 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4846 const auto *LHSVar =
4847 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4848 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4849 QualType PrivTy = (*IPriv)->getType();
4850 if (PrivTy->isVariablyModifiedType()) {
4851 // Get array size and emit VLA type.
4852 ++Idx;
4853 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4854 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4855 const VariableArrayType *VLA =
4856 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4857 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4858 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4859 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4860 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4861 }
4862 }
4863 Scope.Privatize();
4864 IPriv = Privates.begin();
4865 const auto *ILHS = LHSExprs.begin();
4866 const auto *IRHS = RHSExprs.begin();
4867 for (const Expr *E : ReductionOps) {
4868 if ((*IPriv)->getType()->isArrayType()) {
4869 // Emit reduction for array section.
4870 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4871 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4872 EmitOMPAggregateReduction(
4873 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4874 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4875 emitReductionCombiner(CGF, ReductionOp: E);
4876 });
4877 } else {
4878 // Emit reduction for array subscript or single variable.
4879 emitReductionCombiner(CGF, ReductionOp: E);
4880 }
4881 ++IPriv;
4882 ++ILHS;
4883 ++IRHS;
4884 }
4885 Scope.ForceCleanup();
4886 CGF.FinishFunction();
4887 return Fn;
4888}
4889
4890void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4891 const Expr *ReductionOp,
4892 const Expr *PrivateRef,
4893 const DeclRefExpr *LHS,
4894 const DeclRefExpr *RHS) {
4895 if (PrivateRef->getType()->isArrayType()) {
4896 // Emit reduction for array section.
4897 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4898 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4899 EmitOMPAggregateReduction(
4900 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4901 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4902 emitReductionCombiner(CGF, ReductionOp);
4903 });
4904 } else {
4905 // Emit reduction for array subscript or single variable.
4906 emitReductionCombiner(CGF, ReductionOp);
4907 }
4908}
4909
4910static std::string generateUniqueName(CodeGenModule &CGM,
4911 llvm::StringRef Prefix, const Expr *Ref);
4912
4913void CGOpenMPRuntime::emitPrivateReduction(
4914 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4915 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4916
4917 // Create a shared global variable (__shared_reduction_var) to accumulate the
4918 // final result.
4919 //
4920 // Call __kmpc_barrier to synchronize threads before initialization.
4921 //
4922 // The master thread (thread_id == 0) initializes __shared_reduction_var
4923 // with the identity value or initializer.
4924 //
4925 // Call __kmpc_barrier to synchronize before combining.
4926 // For each i:
4927 // - Thread enters critical section.
4928 // - Reads its private value from LHSExprs[i].
4929 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4930 // Privates[i]).
4931 // - Exits critical section.
4932 //
4933 // Call __kmpc_barrier after combining.
4934 //
4935 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4936 //
4937 // Final __kmpc_barrier to synchronize after broadcasting
4938 QualType PrivateType = Privates->getType();
4939 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
4940
4941 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
4942 std::string ReductionVarNameStr;
4943 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
4944 ReductionVarNameStr =
4945 generateUniqueName(CGM, DRE->getDecl()->getNameAsString(), Privates);
4946 else
4947 ReductionVarNameStr = "unnamed_priv_var";
4948
4949 // Create an internal shared variable
4950 std::string SharedName =
4951 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
4952 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4953 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
4954
4955 SharedVar->setAlignment(
4956 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
4957
4958 Address SharedResult =
4959 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
4960
4961 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4962 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
4963 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4964
4965 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
4966 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
4967
4968 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4969 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
4970 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
4971
4972 CGF.EmitBlock(BB: InitBB);
4973
4974 auto EmitSharedInit = [&]() {
4975 if (UDR) { // Check if it's a User-Defined Reduction
4976 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4977 std::pair<llvm::Function *, llvm::Function *> FnPair =
4978 getUserDefinedReduction(D: UDR);
4979 llvm::Function *InitializerFn = FnPair.second;
4980 if (InitializerFn) {
4981 if (const auto *CE =
4982 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
4983 const auto *OutDRE = cast<DeclRefExpr>(
4984 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
4985 ->getSubExpr());
4986 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
4987
4988 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4989 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
4990
4991 (void)LocalScope.Privatize();
4992 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4993 Val: CE->getCallee()->IgnoreParenImpCasts())) {
4994 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4995 CGF, OVE, RValue::get(V: InitializerFn));
4996 CGF.EmitIgnoredExpr(CE);
4997 } else {
4998 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
4999 Quals: PrivateType.getQualifiers(),
5000 /*IsInitializer=*/true);
5001 }
5002 } else {
5003 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5004 Quals: PrivateType.getQualifiers(),
5005 /*IsInitializer=*/true);
5006 }
5007 } else {
5008 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5009 Quals: PrivateType.getQualifiers(),
5010 /*IsInitializer=*/true);
5011 }
5012 } else {
5013 // EmitNullInitialization handles default construction for C++ classes
5014 // and zeroing for scalars, which is a reasonable default.
5015 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5016 }
5017 return; // UDR initialization handled
5018 }
5019 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5020 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5021 if (const Expr *InitExpr = VD->getInit()) {
5022 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5023 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5024 return;
5025 }
5026 }
5027 }
5028 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5029 };
5030 EmitSharedInit();
5031 CGF.Builder.CreateBr(Dest: InitEndBB);
5032 CGF.EmitBlock(BB: InitEndBB);
5033
5034 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5035 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5036 args: BarrierArgs);
5037
5038 const Expr *ReductionOp = ReductionOps;
5039 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5040 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5041 LValue LHSLV = CGF.EmitLValue(E: Privates);
5042
5043 auto EmitCriticalReduction = [&](auto ReductionGen) {
5044 std::string CriticalName = getName(Parts: {"reduction_critical"});
5045 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5046 };
5047
5048 if (CurrentUDR) {
5049 // Handle user-defined reduction.
5050 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5051 Action.Enter(CGF);
5052 std::pair<llvm::Function *, llvm::Function *> FnPair =
5053 getUserDefinedReduction(D: CurrentUDR);
5054 if (FnPair.first) {
5055 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5056 const auto *OutDRE = cast<DeclRefExpr>(
5057 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5058 ->getSubExpr());
5059 const auto *InDRE = cast<DeclRefExpr>(
5060 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5061 ->getSubExpr());
5062 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5063 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5064 Addr: SharedLV.getAddress());
5065 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5066 Addr: LHSLV.getAddress());
5067 (void)LocalScope.Privatize();
5068 emitReductionCombiner(CGF, ReductionOp);
5069 }
5070 }
5071 };
5072 EmitCriticalReduction(ReductionGen);
5073 } else {
5074 // Handle built-in reduction operations.
5075#ifndef NDEBUG
5076 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5077 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(Val: ReductionClauseExpr))
5078 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5079
5080 const Expr *AssignRHS = nullptr;
5081 if (const auto *BinOp = dyn_cast<BinaryOperator>(Val: ReductionClauseExpr)) {
5082 if (BinOp->getOpcode() == BO_Assign)
5083 AssignRHS = BinOp->getRHS();
5084 } else if (const auto *OpCall =
5085 dyn_cast<CXXOperatorCallExpr>(Val: ReductionClauseExpr)) {
5086 if (OpCall->getOperator() == OO_Equal)
5087 AssignRHS = OpCall->getArg(1);
5088 }
5089
5090 assert(AssignRHS &&
5091 "Private Variable Reduction : Invalid ReductionOp expression");
5092#endif
5093
5094 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5095 Action.Enter(CGF);
5096 const auto *OmpOutDRE =
5097 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5098 const auto *OmpInDRE =
5099 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5100 assert(
5101 OmpOutDRE && OmpInDRE &&
5102 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5103 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5104 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5105 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5106 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5107 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5108 (void)LocalScope.Privatize();
5109 // Emit the actual reduction operation
5110 CGF.EmitIgnoredExpr(E: ReductionOp);
5111 };
5112 EmitCriticalReduction(ReductionGen);
5113 }
5114
5115 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5116 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5117 args: BarrierArgs);
5118
5119 // Broadcast final result
5120 bool IsAggregate = PrivateType->isAggregateType();
5121 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5122 llvm::Value *FinalResultVal = nullptr;
5123 Address FinalResultAddr = Address::invalid();
5124
5125 if (IsAggregate)
5126 FinalResultAddr = SharedResult;
5127 else
5128 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5129
5130 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5131 if (IsAggregate) {
5132 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5133 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5134 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5135 } else {
5136 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5137 }
5138 // Final synchronization barrier
5139 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5140 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5141 args: BarrierArgs);
5142
5143 // Combiner with original list item
5144 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5145 PrePostActionTy &Action) {
5146 Action.Enter(CGF);
5147 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5148 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5149 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5150 };
5151 EmitCriticalReduction(OriginalListCombiner);
5152}
5153
5154void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5155 ArrayRef<const Expr *> OrgPrivates,
5156 ArrayRef<const Expr *> OrgLHSExprs,
5157 ArrayRef<const Expr *> OrgRHSExprs,
5158 ArrayRef<const Expr *> OrgReductionOps,
5159 ReductionOptionsTy Options) {
5160 if (!CGF.HaveInsertPoint())
5161 return;
5162
5163 bool WithNowait = Options.WithNowait;
5164 bool SimpleReduction = Options.SimpleReduction;
5165
5166 // Next code should be emitted for reduction:
5167 //
5168 // static kmp_critical_name lock = { 0 };
5169 //
5170 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5171 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5172 // ...
5173 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5174 // *(Type<n>-1*)rhs[<n>-1]);
5175 // }
5176 //
5177 // ...
5178 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5179 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5180 // RedList, reduce_func, &<lock>)) {
5181 // case 1:
5182 // ...
5183 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5184 // ...
5185 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5186 // break;
5187 // case 2:
5188 // ...
5189 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5190 // ...
5191 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5192 // break;
5193 // default:;
5194 // }
5195 //
5196 // if SimpleReduction is true, only the next code is generated:
5197 // ...
5198 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5199 // ...
5200
5201 ASTContext &C = CGM.getContext();
5202
5203 if (SimpleReduction) {
5204 CodeGenFunction::RunCleanupsScope Scope(CGF);
5205 const auto *IPriv = OrgPrivates.begin();
5206 const auto *ILHS = OrgLHSExprs.begin();
5207 const auto *IRHS = OrgRHSExprs.begin();
5208 for (const Expr *E : OrgReductionOps) {
5209 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5210 RHS: cast<DeclRefExpr>(Val: *IRHS));
5211 ++IPriv;
5212 ++ILHS;
5213 ++IRHS;
5214 }
5215 return;
5216 }
5217
5218 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5219 // Only keep entries where the corresponding variable is not private.
5220 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5221 FilteredRHSExprs, FilteredReductionOps;
5222 for (unsigned I : llvm::seq<unsigned>(
5223 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5224 if (!Options.IsPrivateVarReduction[I]) {
5225 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5226 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5227 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5228 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5229 }
5230 }
5231 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5232 // processing.
5233 ArrayRef<const Expr *> Privates = FilteredPrivates;
5234 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5235 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5236 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5237
5238 // 1. Build a list of reduction variables.
5239 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5240 auto Size = RHSExprs.size();
5241 for (const Expr *E : Privates) {
5242 if (E->getType()->isVariablyModifiedType())
5243 // Reserve place for array size.
5244 ++Size;
5245 }
5246 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5247 QualType ReductionArrayTy = C.getConstantArrayType(
5248 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5249 /*IndexTypeQuals=*/0);
5250 RawAddress ReductionList =
5251 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5252 const auto *IPriv = Privates.begin();
5253 unsigned Idx = 0;
5254 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5255 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5256 CGF.Builder.CreateStore(
5257 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5258 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5259 Addr: Elem);
5260 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5261 // Store array size.
5262 ++Idx;
5263 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5264 llvm::Value *Size = CGF.Builder.CreateIntCast(
5265 V: CGF.getVLASize(
5266 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5267 .NumElts,
5268 DestTy: CGF.SizeTy, /*isSigned=*/false);
5269 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5270 Addr: Elem);
5271 }
5272 }
5273
5274 // 2. Emit reduce_func().
5275 llvm::Function *ReductionFn = emitReductionFunction(
5276 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5277 Privates, LHSExprs, RHSExprs, ReductionOps);
5278
5279 // 3. Create static kmp_critical_name lock = { 0 };
5280 std::string Name = getName(Parts: {"reduction"});
5281 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5282
5283 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5284 // RedList, reduce_func, &<lock>);
5285 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5286 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5287 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5288 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5289 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5290 llvm::Value *Args[] = {
5291 IdentTLoc, // ident_t *<loc>
5292 ThreadId, // i32 <gtid>
5293 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5294 ReductionArrayTySize, // size_type sizeof(RedList)
5295 RL, // void *RedList
5296 ReductionFn, // void (*) (void *, void *) <reduce_func>
5297 Lock // kmp_critical_name *&<lock>
5298 };
5299 llvm::Value *Res = CGF.EmitRuntimeCall(
5300 callee: OMPBuilder.getOrCreateRuntimeFunction(
5301 M&: CGM.getModule(),
5302 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5303 args: Args);
5304
5305 // 5. Build switch(res)
5306 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5307 llvm::SwitchInst *SwInst =
5308 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5309
5310 // 6. Build case 1:
5311 // ...
5312 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5313 // ...
5314 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315 // break;
5316 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5317 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5318 CGF.EmitBlock(BB: Case1BB);
5319
5320 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5321 llvm::Value *EndArgs[] = {
5322 IdentTLoc, // ident_t *<loc>
5323 ThreadId, // i32 <gtid>
5324 Lock // kmp_critical_name *&<lock>
5325 };
5326 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5327 CodeGenFunction &CGF, PrePostActionTy &Action) {
5328 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5329 const auto *IPriv = Privates.begin();
5330 const auto *ILHS = LHSExprs.begin();
5331 const auto *IRHS = RHSExprs.begin();
5332 for (const Expr *E : ReductionOps) {
5333 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5334 RHS: cast<DeclRefExpr>(Val: *IRHS));
5335 ++IPriv;
5336 ++ILHS;
5337 ++IRHS;
5338 }
5339 };
5340 RegionCodeGenTy RCG(CodeGen);
5341 CommonActionTy Action(
5342 nullptr, {},
5343 OMPBuilder.getOrCreateRuntimeFunction(
5344 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5345 : OMPRTL___kmpc_end_reduce),
5346 EndArgs);
5347 RCG.setAction(Action);
5348 RCG(CGF);
5349
5350 CGF.EmitBranch(Block: DefaultBB);
5351
5352 // 7. Build case 2:
5353 // ...
5354 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5355 // ...
5356 // break;
5357 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5358 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5359 CGF.EmitBlock(BB: Case2BB);
5360
5361 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5362 CodeGenFunction &CGF, PrePostActionTy &Action) {
5363 const auto *ILHS = LHSExprs.begin();
5364 const auto *IRHS = RHSExprs.begin();
5365 const auto *IPriv = Privates.begin();
5366 for (const Expr *E : ReductionOps) {
5367 const Expr *XExpr = nullptr;
5368 const Expr *EExpr = nullptr;
5369 const Expr *UpExpr = nullptr;
5370 BinaryOperatorKind BO = BO_Comma;
5371 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5372 if (BO->getOpcode() == BO_Assign) {
5373 XExpr = BO->getLHS();
5374 UpExpr = BO->getRHS();
5375 }
5376 }
5377 // Try to emit update expression as a simple atomic.
5378 const Expr *RHSExpr = UpExpr;
5379 if (RHSExpr) {
5380 // Analyze RHS part of the whole expression.
5381 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5382 Val: RHSExpr->IgnoreParenImpCasts())) {
5383 // If this is a conditional operator, analyze its condition for
5384 // min/max reduction operator.
5385 RHSExpr = ACO->getCond();
5386 }
5387 if (const auto *BORHS =
5388 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5389 EExpr = BORHS->getRHS();
5390 BO = BORHS->getOpcode();
5391 }
5392 }
5393 if (XExpr) {
5394 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5395 auto &&AtomicRedGen = [BO, VD,
5396 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5397 const Expr *EExpr, const Expr *UpExpr) {
5398 LValue X = CGF.EmitLValue(E: XExpr);
5399 RValue E;
5400 if (EExpr)
5401 E = CGF.EmitAnyExpr(E: EExpr);
5402 CGF.EmitOMPAtomicSimpleUpdateExpr(
5403 X, E, BO, /*IsXLHSInRHSPart=*/true,
5404 AO: llvm::AtomicOrdering::Monotonic, Loc,
5405 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5406 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5407 Address LHSTemp = CGF.CreateMemTemp(VD->getType());
5408 CGF.emitOMPSimpleStore(
5409 LVal: CGF.MakeAddrLValue(LHSTemp, VD->getType()), RVal: XRValue,
5410 RValTy: VD->getType().getNonReferenceType(), Loc);
5411 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5412 (void)PrivateScope.Privatize();
5413 return CGF.EmitAnyExpr(E: UpExpr);
5414 });
5415 };
5416 if ((*IPriv)->getType()->isArrayType()) {
5417 // Emit atomic reduction for array section.
5418 const auto *RHSVar =
5419 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5420 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5421 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5422 } else {
5423 // Emit atomic reduction for array subscript or single variable.
5424 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5425 }
5426 } else {
5427 // Emit as a critical region.
5428 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5429 const Expr *, const Expr *) {
5430 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5431 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5432 RT.emitCriticalRegion(
5433 CGF, CriticalName: Name,
5434 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5435 Action.Enter(CGF);
5436 emitReductionCombiner(CGF, ReductionOp: E);
5437 },
5438 Loc);
5439 };
5440 if ((*IPriv)->getType()->isArrayType()) {
5441 const auto *LHSVar =
5442 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5443 const auto *RHSVar =
5444 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5445 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5446 RedOpGen: CritRedGen);
5447 } else {
5448 CritRedGen(CGF, nullptr, nullptr, nullptr);
5449 }
5450 }
5451 ++ILHS;
5452 ++IRHS;
5453 ++IPriv;
5454 }
5455 };
5456 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5457 if (!WithNowait) {
5458 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5459 llvm::Value *EndArgs[] = {
5460 IdentTLoc, // ident_t *<loc>
5461 ThreadId, // i32 <gtid>
5462 Lock // kmp_critical_name *&<lock>
5463 };
5464 CommonActionTy Action(nullptr, {},
5465 OMPBuilder.getOrCreateRuntimeFunction(
5466 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5467 EndArgs);
5468 AtomicRCG.setAction(Action);
5469 AtomicRCG(CGF);
5470 } else {
5471 AtomicRCG(CGF);
5472 }
5473
5474 CGF.EmitBranch(Block: DefaultBB);
5475 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5476 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5477 "PrivateVarReduction: Privates size mismatch");
5478 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5479 "PrivateVarReduction: ReductionOps size mismatch");
5480 for (unsigned I : llvm::seq<unsigned>(
5481 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5482 if (Options.IsPrivateVarReduction[I])
5483 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5484 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5485 }
5486}
5487
5488/// Generates unique name for artificial threadprivate variables.
5489/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5490static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5491 const Expr *Ref) {
5492 SmallString<256> Buffer;
5493 llvm::raw_svector_ostream Out(Buffer);
5494 const clang::DeclRefExpr *DE;
5495 const VarDecl *D = ::getBaseDecl(Ref, DE);
5496 if (!D)
5497 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5498 D = D->getCanonicalDecl();
5499 std::string Name = CGM.getOpenMPRuntime().getName(
5500 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5501 Out << Prefix << Name << "_"
5502 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5503 return std::string(Out.str());
5504}
5505
5506/// Emits reduction initializer function:
5507/// \code
5508/// void @.red_init(void* %arg, void* %orig) {
5509/// %0 = bitcast void* %arg to <type>*
5510/// store <type> <init>, <type>* %0
5511/// ret void
5512/// }
5513/// \endcode
5514static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5515 SourceLocation Loc,
5516 ReductionCodeGen &RCG, unsigned N) {
5517 ASTContext &C = CGM.getContext();
5518 QualType VoidPtrTy = C.VoidPtrTy;
5519 VoidPtrTy.addRestrict();
5520 FunctionArgList Args;
5521 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5522 ImplicitParamKind::Other);
5523 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5524 ImplicitParamKind::Other);
5525 Args.emplace_back(Args: &Param);
5526 Args.emplace_back(Args: &ParamOrig);
5527 const auto &FnInfo =
5528 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5529 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5530 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5531 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5532 N: Name, M: &CGM.getModule());
5533 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5534 Fn->setDoesNotRecurse();
5535 CodeGenFunction CGF(CGM);
5536 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5537 QualType PrivateType = RCG.getPrivateType(N);
5538 Address PrivateAddr = CGF.EmitLoadOfPointer(
5539 Ptr: CGF.GetAddrOfLocalVar(&Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5540 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5541 llvm::Value *Size = nullptr;
5542 // If the size of the reduction item is non-constant, load it from global
5543 // threadprivate variable.
5544 if (RCG.getSizes(N).second) {
5545 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5546 CGF, VarType: CGM.getContext().getSizeType(),
5547 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5548 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5549 Ty: CGM.getContext().getSizeType(), Loc);
5550 }
5551 RCG.emitAggregateType(CGF, N, Size);
5552 Address OrigAddr = Address::invalid();
5553 // If initializer uses initializer from declare reduction construct, emit a
5554 // pointer to the address of the original reduction item (reuired by reduction
5555 // initializer)
5556 if (RCG.usesReductionInitializer(N)) {
5557 Address SharedAddr = CGF.GetAddrOfLocalVar(&ParamOrig);
5558 OrigAddr = CGF.EmitLoadOfPointer(
5559 Ptr: SharedAddr,
5560 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5561 }
5562 // Emit the initializer:
5563 // %0 = bitcast void* %arg to <type>*
5564 // store <type> <init>, <type>* %0
5565 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5566 DefaultInit: [](CodeGenFunction &) { return false; });
5567 CGF.FinishFunction();
5568 return Fn;
5569}
5570
5571/// Emits reduction combiner function:
5572/// \code
5573/// void @.red_comb(void* %arg0, void* %arg1) {
5574/// %lhs = bitcast void* %arg0 to <type>*
5575/// %rhs = bitcast void* %arg1 to <type>*
5576/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5577/// store <type> %2, <type>* %lhs
5578/// ret void
5579/// }
5580/// \endcode
5581static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5582 SourceLocation Loc,
5583 ReductionCodeGen &RCG, unsigned N,
5584 const Expr *ReductionOp,
5585 const Expr *LHS, const Expr *RHS,
5586 const Expr *PrivateRef) {
5587 ASTContext &C = CGM.getContext();
5588 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5589 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5590 FunctionArgList Args;
5591 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5592 C.VoidPtrTy, ImplicitParamKind::Other);
5593 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5594 ImplicitParamKind::Other);
5595 Args.emplace_back(Args: &ParamInOut);
5596 Args.emplace_back(Args: &ParamIn);
5597 const auto &FnInfo =
5598 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5599 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5600 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5601 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5602 N: Name, M: &CGM.getModule());
5603 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5604 Fn->setDoesNotRecurse();
5605 CodeGenFunction CGF(CGM);
5606 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5607 llvm::Value *Size = nullptr;
5608 // If the size of the reduction item is non-constant, load it from global
5609 // threadprivate variable.
5610 if (RCG.getSizes(N).second) {
5611 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5612 CGF, VarType: CGM.getContext().getSizeType(),
5613 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5614 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5615 Ty: CGM.getContext().getSizeType(), Loc);
5616 }
5617 RCG.emitAggregateType(CGF, N, Size);
5618 // Remap lhs and rhs variables to the addresses of the function arguments.
5619 // %lhs = bitcast void* %arg0 to <type>*
5620 // %rhs = bitcast void* %arg1 to <type>*
5621 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5622 PrivateScope.addPrivate(
5623 LocalVD: LHSVD,
5624 // Pull out the pointer to the variable.
5625 Addr: CGF.EmitLoadOfPointer(
5626 Ptr: CGF.GetAddrOfLocalVar(&ParamInOut)
5627 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5628 PtrTy: C.getPointerType(LHSVD->getType())->castAs<PointerType>()));
5629 PrivateScope.addPrivate(
5630 LocalVD: RHSVD,
5631 // Pull out the pointer to the variable.
5632 Addr: CGF.EmitLoadOfPointer(
5633 Ptr: CGF.GetAddrOfLocalVar(&ParamIn).withElementType(
5634 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5635 PtrTy: C.getPointerType(RHSVD->getType())->castAs<PointerType>()));
5636 PrivateScope.Privatize();
5637 // Emit the combiner body:
5638 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5639 // store <type> %2, <type>* %lhs
5640 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5641 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5642 RHS: cast<DeclRefExpr>(Val: RHS));
5643 CGF.FinishFunction();
5644 return Fn;
5645}
5646
5647/// Emits reduction finalizer function:
5648/// \code
5649/// void @.red_fini(void* %arg) {
5650/// %0 = bitcast void* %arg to <type>*
5651/// <destroy>(<type>* %0)
5652/// ret void
5653/// }
5654/// \endcode
5655static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5656 SourceLocation Loc,
5657 ReductionCodeGen &RCG, unsigned N) {
5658 if (!RCG.needCleanups(N))
5659 return nullptr;
5660 ASTContext &C = CGM.getContext();
5661 FunctionArgList Args;
5662 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5663 ImplicitParamKind::Other);
5664 Args.emplace_back(Args: &Param);
5665 const auto &FnInfo =
5666 CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
5667 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(FnInfo);
5668 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5669 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5670 N: Name, M: &CGM.getModule());
5671 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5672 Fn->setDoesNotRecurse();
5673 CodeGenFunction CGF(CGM);
5674 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: Fn, FnInfo: FnInfo, Args, Loc, StartLoc: Loc);
5675 Address PrivateAddr = CGF.EmitLoadOfPointer(
5676 Ptr: CGF.GetAddrOfLocalVar(&Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5677 llvm::Value *Size = nullptr;
5678 // If the size of the reduction item is non-constant, load it from global
5679 // threadprivate variable.
5680 if (RCG.getSizes(N).second) {
5681 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5682 CGF, VarType: CGM.getContext().getSizeType(),
5683 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5684 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5685 Ty: CGM.getContext().getSizeType(), Loc);
5686 }
5687 RCG.emitAggregateType(CGF, N, Size);
5688 // Emit the finalizer body:
5689 // <destroy>(<type>* %0)
5690 RCG.emitCleanups(CGF, N, PrivateAddr);
5691 CGF.FinishFunction(EndLoc: Loc);
5692 return Fn;
5693}
5694
5695llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5696 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5697 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5698 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5699 return nullptr;
5700
5701 // Build typedef struct:
5702 // kmp_taskred_input {
5703 // void *reduce_shar; // shared reduction item
5704 // void *reduce_orig; // original reduction item used for initialization
5705 // size_t reduce_size; // size of data item
5706 // void *reduce_init; // data initialization routine
5707 // void *reduce_fini; // data finalization routine
5708 // void *reduce_comb; // data combiner routine
5709 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5710 // } kmp_taskred_input_t;
5711 ASTContext &C = CGM.getContext();
5712 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5713 RD->startDefinition();
5714 const FieldDecl *SharedFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5715 const FieldDecl *OrigFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5716 const FieldDecl *SizeFD = addFieldToRecordDecl(C, RD, C.getSizeType());
5717 const FieldDecl *InitFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5718 const FieldDecl *FiniFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5719 const FieldDecl *CombFD = addFieldToRecordDecl(C, RD, C.VoidPtrTy);
5720 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5721 C, RD, C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5722 RD->completeDefinition();
5723 QualType RDType = C.getRecordType(Decl: RD);
5724 unsigned Size = Data.ReductionVars.size();
5725 llvm::APInt ArraySize(/*numBits=*/64, Size);
5726 QualType ArrayRDType =
5727 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5728 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5729 // kmp_task_red_input_t .rd_input.[Size];
5730 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5731 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5732 Data.ReductionCopies, Data.ReductionOps);
5733 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5734 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5735 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5736 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5737 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5738 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5739 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5740 Name: ".rd_input.gep.");
5741 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5742 // ElemLVal.reduce_shar = &Shareds[Cnt];
5743 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5744 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5745 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5746 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5747 // ElemLVal.reduce_orig = &Origs[Cnt];
5748 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5749 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5750 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5751 RCG.emitAggregateType(CGF, N: Cnt);
5752 llvm::Value *SizeValInChars;
5753 llvm::Value *SizeVal;
5754 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5755 // We use delayed creation/initialization for VLAs and array sections. It is
5756 // required because runtime does not provide the way to pass the sizes of
5757 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5758 // threadprivate global variables are used to store these values and use
5759 // them in the functions.
5760 bool DelayedCreation = !!SizeVal;
5761 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5762 /*isSigned=*/false);
5763 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5764 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5765 // ElemLVal.reduce_init = init;
5766 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5767 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5768 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5769 // ElemLVal.reduce_fini = fini;
5770 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5771 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5772 llvm::Value *FiniAddr =
5773 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5774 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5775 // ElemLVal.reduce_comb = comb;
5776 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5777 llvm::Value *CombAddr = emitReduceCombFunction(
5778 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5779 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5780 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5781 // ElemLVal.flags = 0;
5782 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5783 if (DelayedCreation) {
5784 CGF.EmitStoreOfScalar(
5785 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5786 lvalue: FlagsLVal);
5787 } else
5788 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5789 }
5790 if (Data.IsReductionWithTaskMod) {
5791 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5792 // is_ws, int num, void *data);
5793 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5794 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5795 DestTy: CGM.IntTy, /*isSigned=*/true);
5796 llvm::Value *Args[] = {
5797 IdentTLoc, GTid,
5798 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5799 /*isSigned=*/IsSigned: true),
5800 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5801 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5802 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5803 return CGF.EmitRuntimeCall(
5804 callee: OMPBuilder.getOrCreateRuntimeFunction(
5805 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5806 args: Args);
5807 }
5808 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5809 llvm::Value *Args[] = {
5810 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5811 /*isSigned=*/true),
5812 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5813 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5814 DestTy: CGM.VoidPtrTy)};
5815 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5816 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5817 args: Args);
5818}
5819
5820void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5821 SourceLocation Loc,
5822 bool IsWorksharingReduction) {
5823 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5824 // is_ws, int num, void *data);
5825 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5826 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5827 DestTy: CGM.IntTy, /*isSigned=*/true);
5828 llvm::Value *Args[] = {IdentTLoc, GTid,
5829 llvm::ConstantInt::get(Ty: CGM.IntTy,
5830 V: IsWorksharingReduction ? 1 : 0,
5831 /*isSigned=*/IsSigned: true)};
5832 (void)CGF.EmitRuntimeCall(
5833 callee: OMPBuilder.getOrCreateRuntimeFunction(
5834 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5835 args: Args);
5836}
5837
5838void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5839 SourceLocation Loc,
5840 ReductionCodeGen &RCG,
5841 unsigned N) {
5842 auto Sizes = RCG.getSizes(N);
5843 // Emit threadprivate global variable if the type is non-constant
5844 // (Sizes.second = nullptr).
5845 if (Sizes.second) {
5846 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5847 /*isSigned=*/false);
5848 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5849 CGF, VarType: CGM.getContext().getSizeType(),
5850 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5851 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5852 }
5853}
5854
5855Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5856 SourceLocation Loc,
5857 llvm::Value *ReductionsPtr,
5858 LValue SharedLVal) {
5859 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5860 // *d);
5861 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5862 DestTy: CGM.IntTy,
5863 /*isSigned=*/true),
5864 ReductionsPtr,
5865 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5866 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5867 return Address(
5868 CGF.EmitRuntimeCall(
5869 callee: OMPBuilder.getOrCreateRuntimeFunction(
5870 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5871 args: Args),
5872 CGF.Int8Ty, SharedLVal.getAlignment());
5873}
5874
5875void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5876 const OMPTaskDataTy &Data) {
5877 if (!CGF.HaveInsertPoint())
5878 return;
5879
5880 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5881 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5882 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5883 } else {
5884 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5885 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5886 auto &M = CGM.getModule();
5887 Address DependenciesArray = Address::invalid();
5888 llvm::Value *NumOfElements;
5889 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5890 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5891 if (!Data.Dependences.empty()) {
5892 llvm::Value *DepWaitTaskArgs[7];
5893 DepWaitTaskArgs[0] = UpLoc;
5894 DepWaitTaskArgs[1] = ThreadID;
5895 DepWaitTaskArgs[2] = NumOfElements;
5896 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5897 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5898 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5899 DepWaitTaskArgs[6] =
5900 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5901
5902 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5903
5904 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5905 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5906 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5907 // kmp_int32 has_no_wait); if dependence info is specified.
5908 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5909 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5910 args: DepWaitTaskArgs);
5911
5912 } else {
5913
5914 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5915 // global_tid);
5916 llvm::Value *Args[] = {UpLoc, ThreadID};
5917 // Ignore return result until untied tasks are supported.
5918 CGF.EmitRuntimeCall(
5919 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5920 args: Args);
5921 }
5922 }
5923
5924 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5925 Region->emitUntiedSwitch(CGF);
5926}
5927
5928void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5929 OpenMPDirectiveKind InnerKind,
5930 const RegionCodeGenTy &CodeGen,
5931 bool HasCancel) {
5932 if (!CGF.HaveInsertPoint())
5933 return;
5934 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5935 InnerKind != OMPD_critical &&
5936 InnerKind != OMPD_master &&
5937 InnerKind != OMPD_masked);
5938 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5939}
5940
5941namespace {
5942enum RTCancelKind {
5943 CancelNoreq = 0,
5944 CancelParallel = 1,
5945 CancelLoop = 2,
5946 CancelSections = 3,
5947 CancelTaskgroup = 4
5948};
5949} // anonymous namespace
5950
5951static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5952 RTCancelKind CancelKind = CancelNoreq;
5953 if (CancelRegion == OMPD_parallel)
5954 CancelKind = CancelParallel;
5955 else if (CancelRegion == OMPD_for)
5956 CancelKind = CancelLoop;
5957 else if (CancelRegion == OMPD_sections)
5958 CancelKind = CancelSections;
5959 else {
5960 assert(CancelRegion == OMPD_taskgroup);
5961 CancelKind = CancelTaskgroup;
5962 }
5963 return CancelKind;
5964}
5965
5966void CGOpenMPRuntime::emitCancellationPointCall(
5967 CodeGenFunction &CGF, SourceLocation Loc,
5968 OpenMPDirectiveKind CancelRegion) {
5969 if (!CGF.HaveInsertPoint())
5970 return;
5971 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5972 // global_tid, kmp_int32 cncl_kind);
5973 if (auto *OMPRegionInfo =
5974 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5975 // For 'cancellation point taskgroup', the task region info may not have a
5976 // cancel. This may instead happen in another adjacent task.
5977 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5978 llvm::Value *Args[] = {
5979 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5980 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
5981 // Ignore return result until untied tasks are supported.
5982 llvm::Value *Result = CGF.EmitRuntimeCall(
5983 OMPBuilder.getOrCreateRuntimeFunction(
5984 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5985 Args);
5986 // if (__kmpc_cancellationpoint()) {
5987 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5988 // exit from construct;
5989 // }
5990 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5991 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5992 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5993 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5994 CGF.EmitBlock(BB: ExitBB);
5995 if (CancelRegion == OMPD_parallel)
5996 emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
5997 // exit from construct;
5998 CodeGenFunction::JumpDest CancelDest =
5999 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6000 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6001 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6002 }
6003 }
6004}
6005
6006void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6007 const Expr *IfCond,
6008 OpenMPDirectiveKind CancelRegion) {
6009 if (!CGF.HaveInsertPoint())
6010 return;
6011 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6012 // kmp_int32 cncl_kind);
6013 auto &M = CGM.getModule();
6014 if (auto *OMPRegionInfo =
6015 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6016 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6017 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6018 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6019 llvm::Value *Args[] = {
6020 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6021 CGF.Builder.getInt32(getCancellationKind(CancelRegion))};
6022 // Ignore return result until untied tasks are supported.
6023 llvm::Value *Result = CGF.EmitRuntimeCall(
6024 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), Args);
6025 // if (__kmpc_cancel()) {
6026 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6027 // exit from construct;
6028 // }
6029 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6030 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6031 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6032 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6033 CGF.EmitBlock(BB: ExitBB);
6034 if (CancelRegion == OMPD_parallel)
6035 RT.emitBarrierCall(CGF, Loc, OMPD_unknown, /*EmitChecks=*/false);
6036 // exit from construct;
6037 CodeGenFunction::JumpDest CancelDest =
6038 CGF.getOMPCancelDestination(OMPRegionInfo->getDirectiveKind());
6039 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6040 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6041 };
6042 if (IfCond) {
6043 emitIfClause(CGF, Cond: IfCond, ThenGen,
6044 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6045 } else {
6046 RegionCodeGenTy ThenRCG(ThenGen);
6047 ThenRCG(CGF);
6048 }
6049 }
6050}
6051
6052namespace {
6053/// Cleanup action for uses_allocators support.
6054class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6055 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6056
6057public:
6058 OMPUsesAllocatorsActionTy(
6059 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6060 : Allocators(Allocators) {}
6061 void Enter(CodeGenFunction &CGF) override {
6062 if (!CGF.HaveInsertPoint())
6063 return;
6064 for (const auto &AllocatorData : Allocators) {
6065 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6066 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6067 }
6068 }
6069 void Exit(CodeGenFunction &CGF) override {
6070 if (!CGF.HaveInsertPoint())
6071 return;
6072 for (const auto &AllocatorData : Allocators) {
6073 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6074 Allocator: AllocatorData.first);
6075 }
6076 }
6077};
6078} // namespace
6079
6080void CGOpenMPRuntime::emitTargetOutlinedFunction(
6081 const OMPExecutableDirective &D, StringRef ParentName,
6082 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6083 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6084 assert(!ParentName.empty() && "Invalid target entry parent name!");
6085 HasEmittedTargetRegion = true;
6086 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6087 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6088 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6089 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6090 if (!D.AllocatorTraits)
6091 continue;
6092 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6093 }
6094 }
6095 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6096 CodeGen.setAction(UsesAllocatorAction);
6097 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6098 IsOffloadEntry, CodeGen);
6099}
6100
6101void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6102 const Expr *Allocator,
6103 const Expr *AllocatorTraits) {
6104 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6105 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6106 // Use default memspace handle.
6107 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6108 llvm::Value *NumTraits = llvm::ConstantInt::get(
6109 CGF.IntTy, cast<ConstantArrayType>(
6110 AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6111 ->getSize()
6112 .getLimitedValue());
6113 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6114 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6115 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6116 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, CGF.getContext().VoidPtrTy,
6117 AllocatorTraitsLVal.getBaseInfo(),
6118 AllocatorTraitsLVal.getTBAAInfo());
6119 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6120
6121 llvm::Value *AllocatorVal =
6122 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6123 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6124 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6125 // Store to allocator.
6126 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6127 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6128 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6129 AllocatorVal =
6130 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6131 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6132 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6133}
6134
6135void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6136 const Expr *Allocator) {
6137 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6138 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6139 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6140 llvm::Value *AllocatorVal =
6141 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6142 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6143 DstTy: CGF.getContext().VoidPtrTy,
6144 Loc: Allocator->getExprLoc());
6145 (void)CGF.EmitRuntimeCall(
6146 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6147 FnID: OMPRTL___kmpc_destroy_allocator),
6148 args: {ThreadId, AllocatorVal});
6149}
6150
6151void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6152 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6153 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6154 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6155 "invalid default attrs structure");
6156 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6157 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6158
6159 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6160 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6161 /*UpperBoundOnly=*/true);
6162
6163 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6164 for (auto *A : C->getAttrs()) {
6165 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6166 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6167 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
6168 CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &AttrMaxThreadsVal,
6169 &AttrMinBlocksVal, &AttrMaxBlocksVal);
6170 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
6171 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6172 nullptr, Attr, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal,
6173 &AttrMaxThreadsVal);
6174 else
6175 continue;
6176
6177 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6178 if (AttrMaxThreadsVal > 0)
6179 MaxThreadsVal = MaxThreadsVal > 0
6180 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6181 : AttrMaxThreadsVal;
6182 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6183 if (AttrMaxBlocksVal > 0)
6184 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6185 : AttrMaxBlocksVal;
6186 }
6187 }
6188}
6189
6190void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6191 const OMPExecutableDirective &D, StringRef ParentName,
6192 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6193 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6194
6195 llvm::TargetRegionEntryInfo EntryInfo =
6196 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6197
6198 CodeGenFunction CGF(CGM, true);
6199 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6200 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6201 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
6202
6203 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6204 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6205 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
6206 };
6207
6208 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6209 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6210 OutlinedFnID));
6211
6212 if (!OutlinedFn)
6213 return;
6214
6215 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6216
6217 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6218 for (auto *A : C->getAttrs()) {
6219 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
6220 CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
6221 }
6222 }
6223}
6224
6225/// Checks if the expression is constant or does not have non-trivial function
6226/// calls.
6227static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6228 // We can skip constant expressions.
6229 // We can skip expressions with trivial calls or simple expressions.
6230 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6231 !E->hasNonTrivialCall(Ctx)) &&
6232 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6233}
6234
6235const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6236 const Stmt *Body) {
6237 const Stmt *Child = Body->IgnoreContainers();
6238 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6239 Child = nullptr;
6240 for (const Stmt *S : C->body()) {
6241 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6242 if (isTrivial(Ctx, E))
6243 continue;
6244 }
6245 // Some of the statements can be ignored.
6246 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6247 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6248 continue;
6249 // Analyze declarations.
6250 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6251 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6252 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6253 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6254 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6255 isa<UsingDirectiveDecl>(Val: D) ||
6256 isa<OMPDeclareReductionDecl>(Val: D) ||
6257 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6258 return true;
6259 const auto *VD = dyn_cast<VarDecl>(Val: D);
6260 if (!VD)
6261 return false;
6262 return VD->hasGlobalStorage() || !VD->isUsed();
6263 }))
6264 continue;
6265 }
6266 // Found multiple children - cannot get the one child only.
6267 if (Child)
6268 return nullptr;
6269 Child = S;
6270 }
6271 if (Child)
6272 Child = Child->IgnoreContainers();
6273 }
6274 return Child;
6275}
6276
6277const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6278 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6279 int32_t &MaxTeamsVal) {
6280
6281 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6282 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6283 "Expected target-based executable directive.");
6284 switch (DirectiveKind) {
6285 case OMPD_target: {
6286 const auto *CS = D.getInnermostCapturedStmt();
6287 const auto *Body =
6288 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6289 const Stmt *ChildStmt =
6290 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6291 if (const auto *NestedDir =
6292 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6293 if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) {
6294 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6295 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6296 ->getNumTeams()
6297 .front();
6298 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6299 if (auto Constant =
6300 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6301 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6302 return NumTeams;
6303 }
6304 MinTeamsVal = MaxTeamsVal = 0;
6305 return nullptr;
6306 }
6307 MinTeamsVal = MaxTeamsVal = 1;
6308 return nullptr;
6309 }
6310 // A value of -1 is used to check if we need to emit no teams region
6311 MinTeamsVal = MaxTeamsVal = -1;
6312 return nullptr;
6313 }
6314 case OMPD_target_teams_loop:
6315 case OMPD_target_teams:
6316 case OMPD_target_teams_distribute:
6317 case OMPD_target_teams_distribute_simd:
6318 case OMPD_target_teams_distribute_parallel_for:
6319 case OMPD_target_teams_distribute_parallel_for_simd: {
6320 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6321 const Expr *NumTeams =
6322 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6323 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6324 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6325 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6326 return NumTeams;
6327 }
6328 MinTeamsVal = MaxTeamsVal = 0;
6329 return nullptr;
6330 }
6331 case OMPD_target_parallel:
6332 case OMPD_target_parallel_for:
6333 case OMPD_target_parallel_for_simd:
6334 case OMPD_target_parallel_loop:
6335 case OMPD_target_simd:
6336 MinTeamsVal = MaxTeamsVal = 1;
6337 return nullptr;
6338 case OMPD_parallel:
6339 case OMPD_for:
6340 case OMPD_parallel_for:
6341 case OMPD_parallel_loop:
6342 case OMPD_parallel_master:
6343 case OMPD_parallel_sections:
6344 case OMPD_for_simd:
6345 case OMPD_parallel_for_simd:
6346 case OMPD_cancel:
6347 case OMPD_cancellation_point:
6348 case OMPD_ordered:
6349 case OMPD_threadprivate:
6350 case OMPD_allocate:
6351 case OMPD_task:
6352 case OMPD_simd:
6353 case OMPD_tile:
6354 case OMPD_unroll:
6355 case OMPD_sections:
6356 case OMPD_section:
6357 case OMPD_single:
6358 case OMPD_master:
6359 case OMPD_critical:
6360 case OMPD_taskyield:
6361 case OMPD_barrier:
6362 case OMPD_taskwait:
6363 case OMPD_taskgroup:
6364 case OMPD_atomic:
6365 case OMPD_flush:
6366 case OMPD_depobj:
6367 case OMPD_scan:
6368 case OMPD_teams:
6369 case OMPD_target_data:
6370 case OMPD_target_exit_data:
6371 case OMPD_target_enter_data:
6372 case OMPD_distribute:
6373 case OMPD_distribute_simd:
6374 case OMPD_distribute_parallel_for:
6375 case OMPD_distribute_parallel_for_simd:
6376 case OMPD_teams_distribute:
6377 case OMPD_teams_distribute_simd:
6378 case OMPD_teams_distribute_parallel_for:
6379 case OMPD_teams_distribute_parallel_for_simd:
6380 case OMPD_target_update:
6381 case OMPD_declare_simd:
6382 case OMPD_declare_variant:
6383 case OMPD_begin_declare_variant:
6384 case OMPD_end_declare_variant:
6385 case OMPD_declare_target:
6386 case OMPD_end_declare_target:
6387 case OMPD_declare_reduction:
6388 case OMPD_declare_mapper:
6389 case OMPD_taskloop:
6390 case OMPD_taskloop_simd:
6391 case OMPD_master_taskloop:
6392 case OMPD_master_taskloop_simd:
6393 case OMPD_parallel_master_taskloop:
6394 case OMPD_parallel_master_taskloop_simd:
6395 case OMPD_requires:
6396 case OMPD_metadirective:
6397 case OMPD_unknown:
6398 break;
6399 default:
6400 break;
6401 }
6402 llvm_unreachable("Unexpected directive kind.");
6403}
6404
6405llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6406 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6407 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6408 "Clauses associated with the teams directive expected to be emitted "
6409 "only for the host!");
6410 CGBuilderTy &Bld = CGF.Builder;
6411 int32_t MinNT = -1, MaxNT = -1;
6412 const Expr *NumTeams =
6413 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6414 if (NumTeams != nullptr) {
6415 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6416
6417 switch (DirectiveKind) {
6418 case OMPD_target: {
6419 const auto *CS = D.getInnermostCapturedStmt();
6420 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6421 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6422 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6423 /*IgnoreResultAssign*/ true);
6424 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6425 /*isSigned=*/true);
6426 }
6427 case OMPD_target_teams:
6428 case OMPD_target_teams_distribute:
6429 case OMPD_target_teams_distribute_simd:
6430 case OMPD_target_teams_distribute_parallel_for:
6431 case OMPD_target_teams_distribute_parallel_for_simd: {
6432 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6433 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6434 /*IgnoreResultAssign*/ true);
6435 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6436 /*isSigned=*/true);
6437 }
6438 default:
6439 break;
6440 }
6441 }
6442
6443 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6444 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6445}
6446
6447/// Check for a num threads constant value (stored in \p DefaultVal), or
6448/// expression (stored in \p E). If the value is conditional (via an if-clause),
6449/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6450/// nullptr, no expression evaluation is perfomed.
6451static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6452 const Expr **E, int32_t &UpperBound,
6453 bool UpperBoundOnly, llvm::Value **CondVal) {
6454 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6455 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6456 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6457 if (!Dir)
6458 return;
6459
6460 if (isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6461 // Handle if clause. If if clause present, the number of threads is
6462 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6463 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6464 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6465 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6466 const OMPIfClause *IfClause = nullptr;
6467 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6468 if (C->getNameModifier() == OMPD_unknown ||
6469 C->getNameModifier() == OMPD_parallel) {
6470 IfClause = C;
6471 break;
6472 }
6473 }
6474 if (IfClause) {
6475 const Expr *CondExpr = IfClause->getCondition();
6476 bool Result;
6477 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6478 if (!Result) {
6479 UpperBound = 1;
6480 return;
6481 }
6482 } else {
6483 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6484 if (const auto *PreInit =
6485 cast_or_null<DeclStmt>(IfClause->getPreInitStmt())) {
6486 for (const auto *I : PreInit->decls()) {
6487 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6488 CGF.EmitVarDecl(cast<VarDecl>(*I));
6489 } else {
6490 CodeGenFunction::AutoVarEmission Emission =
6491 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6492 CGF.EmitAutoVarCleanups(Emission);
6493 }
6494 }
6495 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6496 }
6497 }
6498 }
6499 }
6500 // Check the value of num_threads clause iff if clause was not specified
6501 // or is not evaluated to false.
6502 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6503 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6504 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6505 const auto *NumThreadsClause =
6506 Dir->getSingleClause<OMPNumThreadsClause>();
6507 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6508 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6509 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6510 UpperBound =
6511 UpperBound
6512 ? Constant->getZExtValue()
6513 : std::min(a: UpperBound,
6514 b: static_cast<int32_t>(Constant->getZExtValue()));
6515 // If we haven't found a upper bound, remember we saw a thread limiting
6516 // clause.
6517 if (UpperBound == -1)
6518 UpperBound = 0;
6519 if (!E)
6520 return;
6521 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6522 if (const auto *PreInit =
6523 cast_or_null<DeclStmt>(NumThreadsClause->getPreInitStmt())) {
6524 for (const auto *I : PreInit->decls()) {
6525 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6526 CGF.EmitVarDecl(cast<VarDecl>(*I));
6527 } else {
6528 CodeGenFunction::AutoVarEmission Emission =
6529 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6530 CGF.EmitAutoVarCleanups(Emission);
6531 }
6532 }
6533 }
6534 *E = NTExpr;
6535 }
6536 return;
6537 }
6538 if (isOpenMPSimdDirective(Dir->getDirectiveKind()))
6539 UpperBound = 1;
6540}
6541
6542const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6543 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6544 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6545 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6546 "Clauses associated with the teams directive expected to be emitted "
6547 "only for the host!");
6548 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6549 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6550 "Expected target-based executable directive.");
6551
6552 const Expr *NT = nullptr;
6553 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6554
6555 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6556 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6557 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6558 UpperBound = UpperBound ? Constant->getZExtValue()
6559 : std::min(a: UpperBound,
6560 b: int32_t(Constant->getZExtValue()));
6561 }
6562 // If we haven't found a upper bound, remember we saw a thread limiting
6563 // clause.
6564 if (UpperBound == -1)
6565 UpperBound = 0;
6566 if (EPtr)
6567 *EPtr = E;
6568 };
6569
6570 auto ReturnSequential = [&]() {
6571 UpperBound = 1;
6572 return NT;
6573 };
6574
6575 switch (DirectiveKind) {
6576 case OMPD_target: {
6577 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6578 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6579 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6580 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6581 // TODO: The standard is not clear how to resolve two thread limit clauses,
6582 // let's pick the teams one if it's present, otherwise the target one.
6583 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6584 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6585 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6586 ThreadLimitClause = TLC;
6587 if (ThreadLimitExpr) {
6588 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6589 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6590 CodeGenFunction::LexicalScope Scope(
6591 CGF,
6592 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6593 if (const auto *PreInit =
6594 cast_or_null<DeclStmt>(ThreadLimitClause->getPreInitStmt())) {
6595 for (const auto *I : PreInit->decls()) {
6596 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6597 CGF.EmitVarDecl(cast<VarDecl>(*I));
6598 } else {
6599 CodeGenFunction::AutoVarEmission Emission =
6600 CGF.EmitAutoVarAlloca(cast<VarDecl>(*I));
6601 CGF.EmitAutoVarCleanups(Emission);
6602 }
6603 }
6604 }
6605 }
6606 }
6607 }
6608 if (ThreadLimitClause)
6609 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6610 ThreadLimitExpr);
6611 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6612 if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) &&
6613 !isOpenMPDistributeDirective(Dir->getDirectiveKind())) {
6614 CS = Dir->getInnermostCapturedStmt();
6615 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6616 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6617 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6618 }
6619 if (Dir && isOpenMPParallelDirective(Dir->getDirectiveKind())) {
6620 CS = Dir->getInnermostCapturedStmt();
6621 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6622 } else if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind()))
6623 return ReturnSequential();
6624 }
6625 return NT;
6626 }
6627 case OMPD_target_teams: {
6628 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6629 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6630 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6631 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6632 ThreadLimitExpr);
6633 }
6634 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6635 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6637 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6638 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6639 if (Dir->getDirectiveKind() == OMPD_distribute) {
6640 CS = Dir->getInnermostCapturedStmt();
6641 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6642 }
6643 }
6644 return NT;
6645 }
6646 case OMPD_target_teams_distribute:
6647 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6648 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6649 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6650 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6651 ThreadLimitExpr);
6652 }
6653 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6654 UpperBoundOnly, CondVal);
6655 return NT;
6656 case OMPD_target_teams_loop:
6657 case OMPD_target_parallel_loop:
6658 case OMPD_target_parallel:
6659 case OMPD_target_parallel_for:
6660 case OMPD_target_parallel_for_simd:
6661 case OMPD_target_teams_distribute_parallel_for:
6662 case OMPD_target_teams_distribute_parallel_for_simd: {
6663 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6664 const OMPIfClause *IfClause = nullptr;
6665 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6666 if (C->getNameModifier() == OMPD_unknown ||
6667 C->getNameModifier() == OMPD_parallel) {
6668 IfClause = C;
6669 break;
6670 }
6671 }
6672 if (IfClause) {
6673 const Expr *Cond = IfClause->getCondition();
6674 bool Result;
6675 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6676 if (!Result)
6677 return ReturnSequential();
6678 } else {
6679 CodeGenFunction::RunCleanupsScope Scope(CGF);
6680 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6681 }
6682 }
6683 }
6684 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6685 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6686 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6687 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6688 ThreadLimitExpr);
6689 }
6690 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6691 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6692 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6693 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6694 return NumThreadsClause->getNumThreads();
6695 }
6696 return NT;
6697 }
6698 case OMPD_target_teams_distribute_simd:
6699 case OMPD_target_simd:
6700 return ReturnSequential();
6701 default:
6702 break;
6703 }
6704 llvm_unreachable("Unsupported directive kind.");
6705}
6706
6707llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6708 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6709 llvm::Value *NumThreadsVal = nullptr;
6710 llvm::Value *CondVal = nullptr;
6711 llvm::Value *ThreadLimitVal = nullptr;
6712 const Expr *ThreadLimitExpr = nullptr;
6713 int32_t UpperBound = -1;
6714
6715 const Expr *NT = getNumThreadsExprForTargetDirective(
6716 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6717 ThreadLimitExpr: &ThreadLimitExpr);
6718
6719 // Thread limit expressions are used below, emit them.
6720 if (ThreadLimitExpr) {
6721 ThreadLimitVal =
6722 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6723 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6724 /*isSigned=*/false);
6725 }
6726
6727 // Generate the num teams expression.
6728 if (UpperBound == 1) {
6729 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6730 } else if (NT) {
6731 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6732 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6733 /*isSigned=*/false);
6734 } else if (ThreadLimitVal) {
6735 // If we do not have a num threads value but a thread limit, replace the
6736 // former with the latter. We know handled the thread limit expression.
6737 NumThreadsVal = ThreadLimitVal;
6738 ThreadLimitVal = nullptr;
6739 } else {
6740 // Default to "0" which means runtime choice.
6741 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6742 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6743 }
6744
6745 // Handle if clause. If if clause present, the number of threads is
6746 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6747 if (CondVal) {
6748 CodeGenFunction::RunCleanupsScope Scope(CGF);
6749 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6750 False: CGF.Builder.getInt32(C: 1));
6751 }
6752
6753 // If the thread limit and num teams expression were present, take the
6754 // minimum.
6755 if (ThreadLimitVal) {
6756 NumThreadsVal = CGF.Builder.CreateSelect(
6757 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6758 True: ThreadLimitVal, False: NumThreadsVal);
6759 }
6760
6761 return NumThreadsVal;
6762}
6763
6764namespace {
6765LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6766
6767// Utility to handle information from clauses associated with a given
6768// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6769// It provides a convenient interface to obtain the information and generate
6770// code for that information.
6771class MappableExprsHandler {
6772public:
6773 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6774 static unsigned getFlagMemberOffset() {
6775 unsigned Offset = 0;
6776 for (uint64_t Remain =
6777 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6778 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6779 !(Remain & 1); Remain = Remain >> 1)
6780 Offset++;
6781 return Offset;
6782 }
6783
6784 /// Class that holds debugging information for a data mapping to be passed to
6785 /// the runtime library.
6786 class MappingExprInfo {
6787 /// The variable declaration used for the data mapping.
6788 const ValueDecl *MapDecl = nullptr;
6789 /// The original expression used in the map clause, or null if there is
6790 /// none.
6791 const Expr *MapExpr = nullptr;
6792
6793 public:
6794 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6795 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6796
6797 const ValueDecl *getMapDecl() const { return MapDecl; }
6798 const Expr *getMapExpr() const { return MapExpr; }
6799 };
6800
6801 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6802 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6803 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6804 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6805 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6806 using MapNonContiguousArrayTy =
6807 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6808 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6809 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6810
6811 /// This structure contains combined information generated for mappable
6812 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6813 /// mappers, and non-contiguous information.
6814 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6815 MapExprsArrayTy Exprs;
6816 MapValueDeclsArrayTy Mappers;
6817 MapValueDeclsArrayTy DevicePtrDecls;
6818
6819 /// Append arrays in \a CurInfo.
6820 void append(MapCombinedInfoTy &CurInfo) {
6821 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6822 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6823 in_end: CurInfo.DevicePtrDecls.end());
6824 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6825 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6826 }
6827 };
6828
6829 /// Map between a struct and the its lowest & highest elements which have been
6830 /// mapped.
6831 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6832 /// HE(FieldIndex, Pointer)}
6833 struct StructRangeInfoTy {
6834 MapCombinedInfoTy PreliminaryMapData;
6835 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6836 0, Address::invalid()};
6837 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6838 0, Address::invalid()};
6839 Address Base = Address::invalid();
6840 Address LB = Address::invalid();
6841 bool IsArraySection = false;
6842 bool HasCompleteRecord = false;
6843 };
6844
6845private:
6846 /// Kind that defines how a device pointer has to be returned.
6847 struct MapInfo {
6848 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6849 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6850 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6851 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6852 bool ReturnDevicePointer = false;
6853 bool IsImplicit = false;
6854 const ValueDecl *Mapper = nullptr;
6855 const Expr *VarRef = nullptr;
6856 bool ForDeviceAddr = false;
6857
6858 MapInfo() = default;
6859 MapInfo(
6860 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6861 OpenMPMapClauseKind MapType,
6862 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6863 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6864 bool ReturnDevicePointer, bool IsImplicit,
6865 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6866 bool ForDeviceAddr = false)
6867 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6868 MotionModifiers(MotionModifiers),
6869 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6870 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6871 };
6872
6873 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6874 /// member and there is no map information about it, then emission of that
6875 /// entry is deferred until the whole struct has been processed.
6876 struct DeferredDevicePtrEntryTy {
6877 const Expr *IE = nullptr;
6878 const ValueDecl *VD = nullptr;
6879 bool ForDeviceAddr = false;
6880
6881 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6882 bool ForDeviceAddr)
6883 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6884 };
6885
6886 /// The target directive from where the mappable clauses were extracted. It
6887 /// is either a executable directive or a user-defined mapper directive.
6888 llvm::PointerUnion<const OMPExecutableDirective *,
6889 const OMPDeclareMapperDecl *>
6890 CurDir;
6891
6892 /// Function the directive is being generated for.
6893 CodeGenFunction &CGF;
6894
6895 /// Set of all first private variables in the current directive.
6896 /// bool data is set to true if the variable is implicitly marked as
6897 /// firstprivate, false otherwise.
6898 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6899
6900 /// Map between device pointer declarations and their expression components.
6901 /// The key value for declarations in 'this' is null.
6902 llvm::DenseMap<
6903 const ValueDecl *,
6904 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6905 DevPointersMap;
6906
6907 /// Map between device addr declarations and their expression components.
6908 /// The key value for declarations in 'this' is null.
6909 llvm::DenseMap<
6910 const ValueDecl *,
6911 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6912 HasDevAddrsMap;
6913
6914 /// Map between lambda declarations and their map type.
6915 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6916
6917 llvm::Value *getExprTypeSize(const Expr *E) const {
6918 QualType ExprTy = E->getType().getCanonicalType();
6919
6920 // Calculate the size for array shaping expression.
6921 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6922 llvm::Value *Size =
6923 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6924 for (const Expr *SE : OAE->getDimensions()) {
6925 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6926 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6927 DstTy: CGF.getContext().getSizeType(),
6928 Loc: SE->getExprLoc());
6929 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6930 }
6931 return Size;
6932 }
6933
6934 // Reference types are ignored for mapping purposes.
6935 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6936 ExprTy = RefTy->getPointeeType().getCanonicalType();
6937
6938 // Given that an array section is considered a built-in type, we need to
6939 // do the calculation based on the length of the section instead of relying
6940 // on CGF.getTypeSize(E->getType()).
6941 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
6942 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6943 Base: OAE->getBase()->IgnoreParenImpCasts())
6944 .getCanonicalType();
6945
6946 // If there is no length associated with the expression and lower bound is
6947 // not specified too, that means we are using the whole length of the
6948 // base.
6949 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6950 !OAE->getLowerBound())
6951 return CGF.getTypeSize(Ty: BaseTy);
6952
6953 llvm::Value *ElemSize;
6954 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6955 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6956 } else {
6957 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6958 assert(ATy && "Expecting array type if not a pointer type.");
6959 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6960 }
6961
6962 // If we don't have a length at this point, that is because we have an
6963 // array section with a single element.
6964 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6965 return ElemSize;
6966
6967 if (const Expr *LenExpr = OAE->getLength()) {
6968 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6969 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6970 DstTy: CGF.getContext().getSizeType(),
6971 Loc: LenExpr->getExprLoc());
6972 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6973 }
6974 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6975 OAE->getLowerBound() && "expected array_section[lb:].");
6976 // Size = sizetype - lb * elemtype;
6977 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6978 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6979 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6980 DstTy: CGF.getContext().getSizeType(),
6981 Loc: OAE->getLowerBound()->getExprLoc());
6982 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6983 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6984 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6985 LengthVal = CGF.Builder.CreateSelect(
6986 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6987 return LengthVal;
6988 }
6989 return CGF.getTypeSize(Ty: ExprTy);
6990 }
6991
6992 /// Return the corresponding bits for a given map clause modifier. Add
6993 /// a flag marking the map as a pointer if requested. Add a flag marking the
6994 /// map as the first one of a series of maps that relate to the same map
6995 /// expression.
6996 OpenMPOffloadMappingFlags getMapTypeBits(
6997 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6998 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6999 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
7000 OpenMPOffloadMappingFlags Bits =
7001 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7002 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7003 switch (MapType) {
7004 case OMPC_MAP_alloc:
7005 case OMPC_MAP_release:
7006 // alloc and release is the default behavior in the runtime library, i.e.
7007 // if we don't pass any bits alloc/release that is what the runtime is
7008 // going to do. Therefore, we don't need to signal anything for these two
7009 // type modifiers.
7010 break;
7011 case OMPC_MAP_to:
7012 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7013 break;
7014 case OMPC_MAP_from:
7015 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7016 break;
7017 case OMPC_MAP_tofrom:
7018 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7019 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7020 break;
7021 case OMPC_MAP_delete:
7022 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7023 break;
7024 case OMPC_MAP_unknown:
7025 llvm_unreachable("Unexpected map type!");
7026 }
7027 if (AddPtrFlag)
7028 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7029 if (AddIsTargetParamFlag)
7030 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7031 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7032 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7033 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7034 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7035 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7036 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7037 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7038 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7039 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7040 if (IsNonContiguous)
7041 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7042 return Bits;
7043 }
7044
7045 /// Return true if the provided expression is a final array section. A
7046 /// final array section, is one whose length can't be proved to be one.
7047 bool isFinalArraySectionExpression(const Expr *E) const {
7048 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7049
7050 // It is not an array section and therefore not a unity-size one.
7051 if (!OASE)
7052 return false;
7053
7054 // An array section with no colon always refer to a single element.
7055 if (OASE->getColonLocFirst().isInvalid())
7056 return false;
7057
7058 const Expr *Length = OASE->getLength();
7059
7060 // If we don't have a length we have to check if the array has size 1
7061 // for this dimension. Also, we should always expect a length if the
7062 // base type is pointer.
7063 if (!Length) {
7064 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7065 Base: OASE->getBase()->IgnoreParenImpCasts())
7066 .getCanonicalType();
7067 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7068 return ATy->getSExtSize() != 1;
7069 // If we don't have a constant dimension length, we have to consider
7070 // the current section as having any size, so it is not necessarily
7071 // unitary. If it happen to be unity size, that's user fault.
7072 return true;
7073 }
7074
7075 // Check if the length evaluates to 1.
7076 Expr::EvalResult Result;
7077 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7078 return true; // Can have more that size 1.
7079
7080 llvm::APSInt ConstLength = Result.Val.getInt();
7081 return ConstLength.getSExtValue() != 1;
7082 }
7083
7084 /// Generate the base pointers, section pointers, sizes, map type bits, and
7085 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7086 /// map type, map or motion modifiers, and expression components.
7087 /// \a IsFirstComponent should be set to true if the provided set of
7088 /// components is the first associated with a capture.
7089 void generateInfoForComponentList(
7090 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7091 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7092 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7093 MapCombinedInfoTy &CombinedInfo,
7094 MapCombinedInfoTy &StructBaseCombinedInfo,
7095 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7096 bool IsImplicit, bool GenerateAllInfoForClauses,
7097 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7098 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7099 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7100 OverlappedElements = {},
7101 bool AreBothBasePtrAndPteeMapped = false) const {
7102 // The following summarizes what has to be generated for each map and the
7103 // types below. The generated information is expressed in this order:
7104 // base pointer, section pointer, size, flags
7105 // (to add to the ones that come from the map type and modifier).
7106 //
7107 // double d;
7108 // int i[100];
7109 // float *p;
7110 // int **a = &i;
7111 //
7112 // struct S1 {
7113 // int i;
7114 // float f[50];
7115 // }
7116 // struct S2 {
7117 // int i;
7118 // float f[50];
7119 // S1 s;
7120 // double *p;
7121 // struct S2 *ps;
7122 // int &ref;
7123 // }
7124 // S2 s;
7125 // S2 *ps;
7126 //
7127 // map(d)
7128 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7129 //
7130 // map(i)
7131 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7132 //
7133 // map(i[1:23])
7134 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7135 //
7136 // map(p)
7137 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7138 //
7139 // map(p[1:24])
7140 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7141 // in unified shared memory mode or for local pointers
7142 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7143 //
7144 // map((*a)[0:3])
7145 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7146 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7147 //
7148 // map(**a)
7149 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7150 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7151 //
7152 // map(s)
7153 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7154 //
7155 // map(s.i)
7156 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7157 //
7158 // map(s.s.f)
7159 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7160 //
7161 // map(s.p)
7162 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7163 //
7164 // map(to: s.p[:22])
7165 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7166 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7167 // &(s.p), &(s.p[0]), 22*sizeof(double),
7168 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7169 // (*) alloc space for struct members, only this is a target parameter
7170 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7171 // optimizes this entry out, same in the examples below)
7172 // (***) map the pointee (map: to)
7173 //
7174 // map(to: s.ref)
7175 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7176 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7177 // (*) alloc space for struct members, only this is a target parameter
7178 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7179 // optimizes this entry out, same in the examples below)
7180 // (***) map the pointee (map: to)
7181 //
7182 // map(s.ps)
7183 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7184 //
7185 // map(from: s.ps->s.i)
7186 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7187 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7188 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7189 //
7190 // map(to: s.ps->ps)
7191 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7192 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7193 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7194 //
7195 // map(s.ps->ps->ps)
7196 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7197 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7198 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7199 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7200 //
7201 // map(to: s.ps->ps->s.f[:22])
7202 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7203 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7204 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7205 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7206 //
7207 // map(ps)
7208 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7209 //
7210 // map(ps->i)
7211 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7212 //
7213 // map(ps->s.f)
7214 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7215 //
7216 // map(from: ps->p)
7217 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7218 //
7219 // map(to: ps->p[:22])
7220 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7221 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7222 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7223 //
7224 // map(ps->ps)
7225 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7226 //
7227 // map(from: ps->ps->s.i)
7228 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7229 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7230 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7231 //
7232 // map(from: ps->ps->ps)
7233 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7234 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7235 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7236 //
7237 // map(ps->ps->ps->ps)
7238 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7239 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7240 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7241 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7242 //
7243 // map(to: ps->ps->ps->s.f[:22])
7244 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7245 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7246 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7247 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7248 //
7249 // map(to: s.f[:22]) map(from: s.p[:33])
7250 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7251 // sizeof(double*) (**), TARGET_PARAM
7252 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7253 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7254 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7255 // (*) allocate contiguous space needed to fit all mapped members even if
7256 // we allocate space for members not mapped (in this example,
7257 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7258 // them as well because they fall between &s.f[0] and &s.p)
7259 //
7260 // map(from: s.f[:22]) map(to: ps->p[:33])
7261 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7262 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7263 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7264 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7265 // (*) the struct this entry pertains to is the 2nd element in the list of
7266 // arguments, hence MEMBER_OF(2)
7267 //
7268 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7269 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7270 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7271 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7272 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7273 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7274 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7275 // (*) the struct this entry pertains to is the 4th element in the list
7276 // of arguments, hence MEMBER_OF(4)
7277 //
7278 // map(p, p[:100])
7279 // ===> map(p[:100])
7280 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7281
7282 // Track if the map information being generated is the first for a capture.
7283 bool IsCaptureFirstInfo = IsFirstComponentList;
7284 // When the variable is on a declare target link or in a to clause with
7285 // unified memory, a reference is needed to hold the host/device address
7286 // of the variable.
7287 bool RequiresReference = false;
7288
7289 // Scan the components from the base to the complete expression.
7290 auto CI = Components.rbegin();
7291 auto CE = Components.rend();
7292 auto I = CI;
7293
7294 // Track if the map information being generated is the first for a list of
7295 // components.
7296 bool IsExpressionFirstInfo = true;
7297 bool FirstPointerInComplexData = false;
7298 Address BP = Address::invalid();
7299 const Expr *AssocExpr = I->getAssociatedExpression();
7300 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7301 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7302 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7303
7304 if (AreBothBasePtrAndPteeMapped && std::next(x: I) == CE)
7305 return;
7306 if (isa<MemberExpr>(Val: AssocExpr)) {
7307 // The base is the 'this' pointer. The content of the pointer is going
7308 // to be the base of the field being mapped.
7309 BP = CGF.LoadCXXThisAddress();
7310 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7311 (OASE &&
7312 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7313 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7314 } else if (OAShE &&
7315 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7316 BP = Address(
7317 CGF.EmitScalarExpr(E: OAShE->getBase()),
7318 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7319 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7320 } else {
7321 // The base is the reference to the variable.
7322 // BP = &Var.
7323 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7324 if (const auto *VD =
7325 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7326 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7327 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7328 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7329 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7330 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7331 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7332 RequiresReference = true;
7333 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7334 }
7335 }
7336 }
7337
7338 // If the variable is a pointer and is being dereferenced (i.e. is not
7339 // the last component), the base has to be the pointer itself, not its
7340 // reference. References are ignored for mapping purposes.
7341 QualType Ty =
7342 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7343 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7344 // No need to generate individual map information for the pointer, it
7345 // can be associated with the combined storage if shared memory mode is
7346 // active or the base declaration is not global variable.
7347 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7348 if (!AreBothBasePtrAndPteeMapped &&
7349 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7350 !VD || VD->hasLocalStorage()))
7351 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7352 else
7353 FirstPointerInComplexData = true;
7354 ++I;
7355 }
7356 }
7357
7358 // Track whether a component of the list should be marked as MEMBER_OF some
7359 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7360 // in a component list should be marked as MEMBER_OF, all subsequent entries
7361 // do not belong to the base struct. E.g.
7362 // struct S2 s;
7363 // s.ps->ps->ps->f[:]
7364 // (1) (2) (3) (4)
7365 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7366 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7367 // is the pointee of ps(2) which is not member of struct s, so it should not
7368 // be marked as such (it is still PTR_AND_OBJ).
7369 // The variable is initialized to false so that PTR_AND_OBJ entries which
7370 // are not struct members are not considered (e.g. array of pointers to
7371 // data).
7372 bool ShouldBeMemberOf = false;
7373
7374 // Variable keeping track of whether or not we have encountered a component
7375 // in the component list which is a member expression. Useful when we have a
7376 // pointer or a final array section, in which case it is the previous
7377 // component in the list which tells us whether we have a member expression.
7378 // E.g. X.f[:]
7379 // While processing the final array section "[:]" it is "f" which tells us
7380 // whether we are dealing with a member of a declared struct.
7381 const MemberExpr *EncounteredME = nullptr;
7382
7383 // Track for the total number of dimension. Start from one for the dummy
7384 // dimension.
7385 uint64_t DimSize = 1;
7386
7387 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7388 bool IsPrevMemberReference = false;
7389
7390 bool IsPartialMapped =
7391 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7392
7393 // We need to check if we will be encountering any MEs. If we do not
7394 // encounter any ME expression it means we will be mapping the whole struct.
7395 // In that case we need to skip adding an entry for the struct to the
7396 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7397 // list only when generating all info for clauses.
7398 bool IsMappingWholeStruct = true;
7399 if (!GenerateAllInfoForClauses) {
7400 IsMappingWholeStruct = false;
7401 } else {
7402 for (auto TempI = I; TempI != CE; ++TempI) {
7403 const MemberExpr *PossibleME =
7404 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7405 if (PossibleME) {
7406 IsMappingWholeStruct = false;
7407 break;
7408 }
7409 }
7410 }
7411
7412 for (; I != CE; ++I) {
7413 // If the current component is member of a struct (parent struct) mark it.
7414 if (!EncounteredME) {
7415 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7416 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7417 // as MEMBER_OF the parent struct.
7418 if (EncounteredME) {
7419 ShouldBeMemberOf = true;
7420 // Do not emit as complex pointer if this is actually not array-like
7421 // expression.
7422 if (FirstPointerInComplexData) {
7423 QualType Ty = std::prev(x: I)
7424 ->getAssociatedDeclaration()
7425 ->getType()
7426 .getNonReferenceType();
7427 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7428 FirstPointerInComplexData = false;
7429 }
7430 }
7431 }
7432
7433 auto Next = std::next(x: I);
7434
7435 // We need to generate the addresses and sizes if this is the last
7436 // component, if the component is a pointer or if it is an array section
7437 // whose length can't be proved to be one. If this is a pointer, it
7438 // becomes the base address for the following components.
7439
7440 // A final array section, is one whose length can't be proved to be one.
7441 // If the map item is non-contiguous then we don't treat any array section
7442 // as final array section.
7443 bool IsFinalArraySection =
7444 !IsNonContiguous &&
7445 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7446
7447 // If we have a declaration for the mapping use that, otherwise use
7448 // the base declaration of the map clause.
7449 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7450 ? I->getAssociatedDeclaration()
7451 : BaseDecl;
7452 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7453 : MapExpr;
7454
7455 // Get information on whether the element is a pointer. Have to do a
7456 // special treatment for array sections given that they are built-in
7457 // types.
7458 const auto *OASE =
7459 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
7460 const auto *OAShE =
7461 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7462 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7463 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7464 bool IsPointer =
7465 OAShE ||
7466 (OASE && ArraySectionExpr::getBaseOriginalType(OASE)
7467 .getCanonicalType()
7468 ->isAnyPointerType()) ||
7469 I->getAssociatedExpression()->getType()->isAnyPointerType();
7470 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7471 MapDecl &&
7472 MapDecl->getType()->isLValueReferenceType();
7473 bool IsNonDerefPointer = IsPointer &&
7474 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7475 !IsNonContiguous;
7476
7477 if (OASE)
7478 ++DimSize;
7479
7480 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7481 IsFinalArraySection) {
7482 // If this is not the last component, we expect the pointer to be
7483 // associated with an array expression or member expression.
7484 assert((Next == CE ||
7485 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7486 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7487 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7488 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7489 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7490 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7491 "Unexpected expression");
7492
7493 Address LB = Address::invalid();
7494 Address LowestElem = Address::invalid();
7495 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7496 const MemberExpr *E) {
7497 const Expr *BaseExpr = E->getBase();
7498 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7499 // scalar.
7500 LValue BaseLV;
7501 if (E->isArrow()) {
7502 LValueBaseInfo BaseInfo;
7503 TBAAAccessInfo TBAAInfo;
7504 Address Addr =
7505 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7506 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7507 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7508 } else {
7509 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7510 }
7511 return BaseLV;
7512 };
7513 if (OAShE) {
7514 LowestElem = LB =
7515 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7516 CGF.ConvertTypeForMem(
7517 T: OAShE->getBase()->getType()->getPointeeType()),
7518 CGF.getContext().getTypeAlignInChars(
7519 T: OAShE->getBase()->getType()));
7520 } else if (IsMemberReference) {
7521 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7522 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7523 LowestElem = CGF.EmitLValueForFieldInitialization(
7524 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7525 .getAddress();
7526 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7527 .getAddress();
7528 } else {
7529 LowestElem = LB =
7530 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7531 .getAddress();
7532 }
7533
7534 // If this component is a pointer inside the base struct then we don't
7535 // need to create any entry for it - it will be combined with the object
7536 // it is pointing to into a single PTR_AND_OBJ entry.
7537 bool IsMemberPointerOrAddr =
7538 EncounteredME &&
7539 (((IsPointer || ForDeviceAddr) &&
7540 I->getAssociatedExpression() == EncounteredME) ||
7541 (IsPrevMemberReference && !IsPointer) ||
7542 (IsMemberReference && Next != CE &&
7543 !Next->getAssociatedExpression()->getType()->isPointerType()));
7544 if (!OverlappedElements.empty() && Next == CE) {
7545 // Handle base element with the info for overlapped elements.
7546 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7547 assert(!IsPointer &&
7548 "Unexpected base element with the pointer type.");
7549 // Mark the whole struct as the struct that requires allocation on the
7550 // device.
7551 PartialStruct.LowestElem = {0, LowestElem};
7552 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7553 T: I->getAssociatedExpression()->getType());
7554 Address HB = CGF.Builder.CreateConstGEP(
7555 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7556 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7557 Index: TypeSize.getQuantity() - 1);
7558 PartialStruct.HighestElem = {
7559 std::numeric_limits<decltype(
7560 PartialStruct.HighestElem.first)>::max(),
7561 HB};
7562 PartialStruct.Base = BP;
7563 PartialStruct.LB = LB;
7564 assert(
7565 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7566 "Overlapped elements must be used only once for the variable.");
7567 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7568 // Emit data for non-overlapped data.
7569 OpenMPOffloadMappingFlags Flags =
7570 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7571 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7572 /*AddPtrFlag=*/false,
7573 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7574 llvm::Value *Size = nullptr;
7575 // Do bitcopy of all non-overlapped structure elements.
7576 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7577 Component : OverlappedElements) {
7578 Address ComponentLB = Address::invalid();
7579 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7580 Component) {
7581 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7582 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7583 if (FD && FD->getType()->isLValueReferenceType()) {
7584 const auto *ME =
7585 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7586 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7587 ComponentLB =
7588 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7589 .getAddress();
7590 } else {
7591 ComponentLB =
7592 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7593 .getAddress();
7594 }
7595 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7596 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7597 Size = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: ComponentLBPtr,
7598 RHS: LBPtr);
7599 break;
7600 }
7601 }
7602 assert(Size && "Failed to determine structure size");
7603 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7604 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7605 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7606 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7607 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7608 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7609 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7610 CombinedInfo.Types.push_back(Elt: Flags);
7611 CombinedInfo.Mappers.push_back(Elt: nullptr);
7612 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7613 : 1);
7614 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7615 }
7616 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7617 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7618 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7619 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7620 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7621 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7622 Size = CGF.Builder.CreatePtrDiff(
7623 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF),
7624 RHS: LBPtr);
7625 CombinedInfo.Sizes.push_back(
7626 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7627 CombinedInfo.Types.push_back(Elt: Flags);
7628 CombinedInfo.Mappers.push_back(Elt: nullptr);
7629 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7630 : 1);
7631 break;
7632 }
7633 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7634 // Skip adding an entry in the CurInfo of this combined entry if the
7635 // whole struct is currently being mapped. The struct needs to be added
7636 // in the first position before any data internal to the struct is being
7637 // mapped.
7638 // Skip adding an entry in the CurInfo of this combined entry if the
7639 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7640 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7641 (Next == CE && MapType != OMPC_MAP_unknown)) {
7642 if (!IsMappingWholeStruct) {
7643 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7644 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7645 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7646 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7647 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7648 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7649 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7650 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7651 : 1);
7652 } else {
7653 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7654 StructBaseCombinedInfo.BasePointers.push_back(
7655 Elt: BP.emitRawPointer(CGF));
7656 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7657 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7658 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7659 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7660 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7661 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7662 Elt: IsNonContiguous ? DimSize : 1);
7663 }
7664
7665 // If Mapper is valid, the last component inherits the mapper.
7666 bool HasMapper = Mapper && Next == CE;
7667 if (!IsMappingWholeStruct)
7668 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7669 else
7670 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7671 : nullptr);
7672
7673 // We need to add a pointer flag for each map that comes from the
7674 // same expression except for the first one. We also need to signal
7675 // this map is the first one that relates with the current capture
7676 // (there is a set of entries for each capture).
7677 OpenMPOffloadMappingFlags Flags =
7678 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7679 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7680 FirstPointerInComplexData || IsMemberReference,
7681 AddIsTargetParamFlag: AreBothBasePtrAndPteeMapped ||
7682 (IsCaptureFirstInfo && !RequiresReference),
7683 IsNonContiguous);
7684
7685 if (!IsExpressionFirstInfo || IsMemberReference) {
7686 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7687 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7688 if (IsPointer || (IsMemberReference && Next != CE))
7689 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7690 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7691 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7692 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7693 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7694
7695 if (ShouldBeMemberOf) {
7696 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7697 // should be later updated with the correct value of MEMBER_OF.
7698 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7699 // From now on, all subsequent PTR_AND_OBJ entries should not be
7700 // marked as MEMBER_OF.
7701 ShouldBeMemberOf = false;
7702 }
7703 }
7704
7705 if (!IsMappingWholeStruct)
7706 CombinedInfo.Types.push_back(Elt: Flags);
7707 else
7708 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7709 }
7710
7711 // If we have encountered a member expression so far, keep track of the
7712 // mapped member. If the parent is "*this", then the value declaration
7713 // is nullptr.
7714 if (EncounteredME) {
7715 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7716 unsigned FieldIndex = FD->getFieldIndex();
7717
7718 // Update info about the lowest and highest elements for this struct
7719 if (!PartialStruct.Base.isValid()) {
7720 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7721 if (IsFinalArraySection && OASE) {
7722 Address HB =
7723 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7724 .getAddress();
7725 PartialStruct.HighestElem = {FieldIndex, HB};
7726 } else {
7727 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7728 }
7729 PartialStruct.Base = BP;
7730 PartialStruct.LB = BP;
7731 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7732 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7733 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7734 if (IsFinalArraySection && OASE) {
7735 Address HB =
7736 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7737 .getAddress();
7738 PartialStruct.HighestElem = {FieldIndex, HB};
7739 } else {
7740 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7741 }
7742 }
7743 }
7744
7745 // Need to emit combined struct for array sections.
7746 if (IsFinalArraySection || IsNonContiguous)
7747 PartialStruct.IsArraySection = true;
7748
7749 // If we have a final array section, we are done with this expression.
7750 if (IsFinalArraySection)
7751 break;
7752
7753 // The pointer becomes the base for the next element.
7754 if (Next != CE)
7755 BP = IsMemberReference ? LowestElem : LB;
7756 if (!IsPartialMapped)
7757 IsExpressionFirstInfo = false;
7758 IsCaptureFirstInfo = false;
7759 FirstPointerInComplexData = false;
7760 IsPrevMemberReference = IsMemberReference;
7761 } else if (FirstPointerInComplexData) {
7762 QualType Ty = Components.rbegin()
7763 ->getAssociatedDeclaration()
7764 ->getType()
7765 .getNonReferenceType();
7766 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7767 FirstPointerInComplexData = false;
7768 }
7769 }
7770 // If ran into the whole component - allocate the space for the whole
7771 // record.
7772 if (!EncounteredME)
7773 PartialStruct.HasCompleteRecord = true;
7774
7775 if (!IsNonContiguous)
7776 return;
7777
7778 const ASTContext &Context = CGF.getContext();
7779
7780 // For supporting stride in array section, we need to initialize the first
7781 // dimension size as 1, first offset as 0, and first count as 1
7782 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7783 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7784 MapValuesArrayTy CurStrides;
7785 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7786 uint64_t ElementTypeSize;
7787
7788 // Collect Size information for each dimension and get the element size as
7789 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7790 // should be [10, 10] and the first stride is 4 btyes.
7791 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7792 Components) {
7793 const Expr *AssocExpr = Component.getAssociatedExpression();
7794 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7795
7796 if (!OASE)
7797 continue;
7798
7799 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7800 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7801 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7802
7803 // We need all the dimension size except for the last dimension.
7804 assert((VAT || CAT || &Component == &*Components.begin()) &&
7805 "Should be either ConstantArray or VariableArray if not the "
7806 "first Component");
7807
7808 // Get element size if CurStrides is empty.
7809 if (CurStrides.empty()) {
7810 const Type *ElementType = nullptr;
7811 if (CAT)
7812 ElementType = CAT->getElementType().getTypePtr();
7813 else if (VAT)
7814 ElementType = VAT->getElementType().getTypePtr();
7815 else
7816 assert(&Component == &*Components.begin() &&
7817 "Only expect pointer (non CAT or VAT) when this is the "
7818 "first Component");
7819 // If ElementType is null, then it means the base is a pointer
7820 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7821 // for next iteration.
7822 if (ElementType) {
7823 // For the case that having pointer as base, we need to remove one
7824 // level of indirection.
7825 if (&Component != &*Components.begin())
7826 ElementType = ElementType->getPointeeOrArrayElementType();
7827 ElementTypeSize =
7828 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7829 CurStrides.push_back(
7830 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7831 }
7832 }
7833 // Get dimension value except for the last dimension since we don't need
7834 // it.
7835 if (DimSizes.size() < Components.size() - 1) {
7836 if (CAT)
7837 DimSizes.push_back(
7838 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
7839 else if (VAT)
7840 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7841 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7842 /*IsSigned=*/isSigned: false));
7843 }
7844 }
7845
7846 // Skip the dummy dimension since we have already have its information.
7847 auto *DI = DimSizes.begin() + 1;
7848 // Product of dimension.
7849 llvm::Value *DimProd =
7850 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7851
7852 // Collect info for non-contiguous. Notice that offset, count, and stride
7853 // are only meaningful for array-section, so we insert a null for anything
7854 // other than array-section.
7855 // Also, the size of offset, count, and stride are not the same as
7856 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7857 // count, and stride are the same as the number of non-contiguous
7858 // declaration in target update to/from clause.
7859 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7860 Components) {
7861 const Expr *AssocExpr = Component.getAssociatedExpression();
7862
7863 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7864 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7865 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7866 /*isSigned=*/false);
7867 CurOffsets.push_back(Elt: Offset);
7868 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7869 CurStrides.push_back(Elt: CurStrides.back());
7870 continue;
7871 }
7872
7873 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7874
7875 if (!OASE)
7876 continue;
7877
7878 // Offset
7879 const Expr *OffsetExpr = OASE->getLowerBound();
7880 llvm::Value *Offset = nullptr;
7881 if (!OffsetExpr) {
7882 // If offset is absent, then we just set it to zero.
7883 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7884 } else {
7885 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7886 DestTy: CGF.Int64Ty,
7887 /*isSigned=*/false);
7888 }
7889 CurOffsets.push_back(Elt: Offset);
7890
7891 // Count
7892 const Expr *CountExpr = OASE->getLength();
7893 llvm::Value *Count = nullptr;
7894 if (!CountExpr) {
7895 // In Clang, once a high dimension is an array section, we construct all
7896 // the lower dimension as array section, however, for case like
7897 // arr[0:2][2], Clang construct the inner dimension as an array section
7898 // but it actually is not in an array section form according to spec.
7899 if (!OASE->getColonLocFirst().isValid() &&
7900 !OASE->getColonLocSecond().isValid()) {
7901 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7902 } else {
7903 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7904 // When the length is absent it defaults to ⌈(size −
7905 // lower-bound)/stride⌉, where size is the size of the array
7906 // dimension.
7907 const Expr *StrideExpr = OASE->getStride();
7908 llvm::Value *Stride =
7909 StrideExpr
7910 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7911 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7912 : nullptr;
7913 if (Stride)
7914 Count = CGF.Builder.CreateUDiv(
7915 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7916 else
7917 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7918 }
7919 } else {
7920 Count = CGF.EmitScalarExpr(E: CountExpr);
7921 }
7922 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7923 CurCounts.push_back(Elt: Count);
7924
7925 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7926 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7927 // Offset Count Stride
7928 // D0 0 1 4 (int) <- dummy dimension
7929 // D1 0 2 8 (2 * (1) * 4)
7930 // D2 1 2 20 (1 * (1 * 5) * 4)
7931 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7932 const Expr *StrideExpr = OASE->getStride();
7933 llvm::Value *Stride =
7934 StrideExpr
7935 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7936 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7937 : nullptr;
7938 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7939 if (Stride)
7940 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7941 else
7942 CurStrides.push_back(Elt: DimProd);
7943 if (DI != DimSizes.end())
7944 ++DI;
7945 }
7946
7947 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7948 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7949 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7950 }
7951
7952 /// Return the adjusted map modifiers if the declaration a capture refers to
7953 /// appears in a first-private clause. This is expected to be used only with
7954 /// directives that start with 'target'.
7955 OpenMPOffloadMappingFlags
7956 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7957 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7958
7959 // A first private variable captured by reference will use only the
7960 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7961 // declaration is known as first-private in this handler.
7962 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7963 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7964 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7965 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7966 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7967 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7968 }
7969 auto I = LambdasMap.find(Cap.getCapturedVar()->getCanonicalDecl());
7970 if (I != LambdasMap.end())
7971 // for map(to: lambda): using user specified map type.
7972 return getMapTypeBits(
7973 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7974 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
7975 /*AddPtrFlag=*/false,
7976 /*AddIsTargetParamFlag=*/false,
7977 /*isNonContiguous=*/IsNonContiguous: false);
7978 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7979 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7980 }
7981
7982 void getPlainLayout(const CXXRecordDecl *RD,
7983 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7984 bool AsBase) const {
7985 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7986
7987 llvm::StructType *St =
7988 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7989
7990 unsigned NumElements = St->getNumElements();
7991 llvm::SmallVector<
7992 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7993 RecordLayout(NumElements);
7994
7995 // Fill bases.
7996 for (const auto &I : RD->bases()) {
7997 if (I.isVirtual())
7998 continue;
7999
8000 QualType BaseTy = I.getType();
8001 const auto *Base = BaseTy->getAsCXXRecordDecl();
8002 // Ignore empty bases.
8003 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8004 CGF.getContext()
8005 .getASTRecordLayout(Base)
8006 .getNonVirtualSize()
8007 .isZero())
8008 continue;
8009
8010 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8011 RecordLayout[FieldIndex] = Base;
8012 }
8013 // Fill in virtual bases.
8014 for (const auto &I : RD->vbases()) {
8015 QualType BaseTy = I.getType();
8016 // Ignore empty bases.
8017 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8018 continue;
8019
8020 const auto *Base = BaseTy->getAsCXXRecordDecl();
8021 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8022 if (RecordLayout[FieldIndex])
8023 continue;
8024 RecordLayout[FieldIndex] = Base;
8025 }
8026 // Fill in all the fields.
8027 assert(!RD->isUnion() && "Unexpected union.");
8028 for (const auto *Field : RD->fields()) {
8029 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8030 // will fill in later.)
8031 if (!Field->isBitField() &&
8032 !isEmptyFieldForLayout(CGF.getContext(), Field)) {
8033 unsigned FieldIndex = RL.getLLVMFieldNo(Field);
8034 RecordLayout[FieldIndex] = Field;
8035 }
8036 }
8037 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8038 &Data : RecordLayout) {
8039 if (Data.isNull())
8040 continue;
8041 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Data))
8042 getPlainLayout(Base, Layout, /*AsBase=*/true);
8043 else
8044 Layout.push_back(cast<const FieldDecl *>(Data));
8045 }
8046 }
8047
8048 /// Generate all the base pointers, section pointers, sizes, map types, and
8049 /// mappers for the extracted mappable expressions (all included in \a
8050 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8051 /// pair of the relevant declaration and index where it occurs is appended to
8052 /// the device pointers info array.
8053 void generateAllInfoForClauses(
8054 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8055 llvm::OpenMPIRBuilder &OMPBuilder,
8056 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8057 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8058 // We have to process the component lists that relate with the same
8059 // declaration in a single chunk so that we can generate the map flags
8060 // correctly. Therefore, we organize all lists in a map.
8061 enum MapKind { Present, Allocs, Other, Total };
8062 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8063 SmallVector<SmallVector<MapInfo, 8>, 4>>
8064 Info;
8065
8066 // Helper function to fill the information map for the different supported
8067 // clauses.
8068 auto &&InfoGen =
8069 [&Info, &SkipVarSet](
8070 const ValueDecl *D, MapKind Kind,
8071 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8072 OpenMPMapClauseKind MapType,
8073 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8074 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8075 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8076 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8077 if (SkipVarSet.contains(D))
8078 return;
8079 auto It = Info.try_emplace(D, Total).first;
8080 It->second[Kind].emplace_back(
8081 L, MapType, MapModifiers, MotionModifiers, ReturnDevicePointer,
8082 IsImplicit, Mapper, VarRef, ForDeviceAddr);
8083 };
8084
8085 for (const auto *Cl : Clauses) {
8086 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8087 if (!C)
8088 continue;
8089 MapKind Kind = Other;
8090 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8091 Element: OMPC_MAP_MODIFIER_present))
8092 Kind = Present;
8093 else if (C->getMapType() == OMPC_MAP_alloc)
8094 Kind = Allocs;
8095 const auto *EI = C->getVarRefs().begin();
8096 for (const auto L : C->component_lists()) {
8097 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8098 InfoGen(std::get<0>(L), Kind, std::get<1>(L), C->getMapType(),
8099 C->getMapTypeModifiers(), {},
8100 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8101 E);
8102 ++EI;
8103 }
8104 }
8105 for (const auto *Cl : Clauses) {
8106 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8107 if (!C)
8108 continue;
8109 MapKind Kind = Other;
8110 if (llvm::is_contained(Range: C->getMotionModifiers(),
8111 Element: OMPC_MOTION_MODIFIER_present))
8112 Kind = Present;
8113 const auto *EI = C->getVarRefs().begin();
8114 for (const auto L : C->component_lists()) {
8115 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_to, {},
8116 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8117 C->isImplicit(), std::get<2>(L), *EI);
8118 ++EI;
8119 }
8120 }
8121 for (const auto *Cl : Clauses) {
8122 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8123 if (!C)
8124 continue;
8125 MapKind Kind = Other;
8126 if (llvm::is_contained(Range: C->getMotionModifiers(),
8127 Element: OMPC_MOTION_MODIFIER_present))
8128 Kind = Present;
8129 const auto *EI = C->getVarRefs().begin();
8130 for (const auto L : C->component_lists()) {
8131 InfoGen(std::get<0>(L), Kind, std::get<1>(L), OMPC_MAP_from, {},
8132 C->getMotionModifiers(),
8133 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L),
8134 *EI);
8135 ++EI;
8136 }
8137 }
8138
8139 // Look at the use_device_ptr and use_device_addr clauses information and
8140 // mark the existing map entries as such. If there is no map information for
8141 // an entry in the use_device_ptr and use_device_addr list, we create one
8142 // with map type 'alloc' and zero size section. It is the user fault if that
8143 // was not mapped before. If there is no map information and the pointer is
8144 // a struct member, then we defer the emission of that entry until the whole
8145 // struct has been processed.
8146 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8147 SmallVector<DeferredDevicePtrEntryTy, 4>>
8148 DeferredInfo;
8149 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8150
8151 auto &&UseDeviceDataCombinedInfoGen =
8152 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8153 CodeGenFunction &CGF, bool IsDevAddr) {
8154 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8155 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8156 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8157 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8158 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8159 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8160 UseDeviceDataCombinedInfo.Sizes.push_back(
8161 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8162 UseDeviceDataCombinedInfo.Types.push_back(
8163 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8164 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8165 };
8166
8167 auto &&MapInfoGen =
8168 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8169 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8170 OMPClauseMappableExprCommon::MappableExprComponentListRef
8171 Components,
8172 bool IsImplicit, bool IsDevAddr) {
8173 // We didn't find any match in our map information - generate a zero
8174 // size array section - if the pointer is a struct member we defer
8175 // this action until the whole struct has been processed.
8176 if (isa<MemberExpr>(Val: IE)) {
8177 // Insert the pointer into Info to be processed by
8178 // generateInfoForComponentList. Because it is a member pointer
8179 // without a pointee, no entry will be generated for it, therefore
8180 // we need to generate one after the whole struct has been
8181 // processed. Nonetheless, generateInfoForComponentList must be
8182 // called to take the pointer into account for the calculation of
8183 // the range of the partial struct.
8184 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8185 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8186 IsDevAddr);
8187 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
8188 } else {
8189 llvm::Value *Ptr;
8190 if (IsDevAddr) {
8191 if (IE->isGLValue())
8192 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8193 else
8194 Ptr = CGF.EmitScalarExpr(E: IE);
8195 } else {
8196 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8197 }
8198 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8199 }
8200 };
8201
8202 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8203 const Expr *IE, bool IsDevAddr) -> bool {
8204 // We potentially have map information for this declaration already.
8205 // Look for the first set of components that refer to it. If found,
8206 // return true.
8207 // If the first component is a member expression, we have to look into
8208 // 'this', which maps to null in the map of map information. Otherwise
8209 // look directly for the information.
8210 auto It = Info.find(isa<MemberExpr>(Val: IE) ? nullptr : VD);
8211 if (It != Info.end()) {
8212 bool Found = false;
8213 for (auto &Data : It->second) {
8214 auto *CI = llvm::find_if(Data, [VD](const MapInfo &MI) {
8215 return MI.Components.back().getAssociatedDeclaration() == VD;
8216 });
8217 // If we found a map entry, signal that the pointer has to be
8218 // returned and move on to the next declaration. Exclude cases where
8219 // the base pointer is mapped as array subscript, array section or
8220 // array shaping. The base address is passed as a pointer to base in
8221 // this case and cannot be used as a base for use_device_ptr list
8222 // item.
8223 if (CI != Data.end()) {
8224 if (IsDevAddr) {
8225 CI->ForDeviceAddr = IsDevAddr;
8226 CI->ReturnDevicePointer = true;
8227 Found = true;
8228 break;
8229 } else {
8230 auto PrevCI = std::next(CI->Components.rbegin());
8231 const auto *VarD = dyn_cast<VarDecl>(VD);
8232 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8233 isa<MemberExpr>(IE) ||
8234 !VD->getType().getNonReferenceType()->isPointerType() ||
8235 PrevCI == CI->Components.rend() ||
8236 isa<MemberExpr>(PrevCI->getAssociatedExpression()) || !VarD ||
8237 VarD->hasLocalStorage()) {
8238 CI->ForDeviceAddr = IsDevAddr;
8239 CI->ReturnDevicePointer = true;
8240 Found = true;
8241 break;
8242 }
8243 }
8244 }
8245 }
8246 return Found;
8247 }
8248 return false;
8249 };
8250
8251 // Look at the use_device_ptr clause information and mark the existing map
8252 // entries as such. If there is no map information for an entry in the
8253 // use_device_ptr list, we create one with map type 'alloc' and zero size
8254 // section. It is the user fault if that was not mapped before. If there is
8255 // no map information and the pointer is a struct member, then we defer the
8256 // emission of that entry until the whole struct has been processed.
8257 for (const auto *Cl : Clauses) {
8258 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
8259 if (!C)
8260 continue;
8261 for (const auto L : C->component_lists()) {
8262 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8263 std::get<1>(L);
8264 assert(!Components.empty() &&
8265 "Not expecting empty list of components!");
8266 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8267 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8268 const Expr *IE = Components.back().getAssociatedExpression();
8269 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8270 continue;
8271 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8272 /*IsDevAddr=*/false);
8273 }
8274 }
8275
8276 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8277 for (const auto *Cl : Clauses) {
8278 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
8279 if (!C)
8280 continue;
8281 for (const auto L : C->component_lists()) {
8282 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8283 std::get<1>(L);
8284 assert(!std::get<1>(L).empty() &&
8285 "Not expecting empty list of components!");
8286 const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
8287 if (!Processed.insert(VD).second)
8288 continue;
8289 VD = cast<ValueDecl>(VD->getCanonicalDecl());
8290 const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
8291 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8292 continue;
8293 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8294 /*IsDevAddr=*/true);
8295 }
8296 }
8297
8298 for (const auto &Data : Info) {
8299 StructRangeInfoTy PartialStruct;
8300 // Current struct information:
8301 MapCombinedInfoTy CurInfo;
8302 // Current struct base information:
8303 MapCombinedInfoTy StructBaseCurInfo;
8304 const Decl *D = Data.first;
8305 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8306 bool HasMapBasePtr = false;
8307 bool HasMapArraySec = false;
8308 if (VD && VD->getType()->isAnyPointerType()) {
8309 for (const auto &M : Data.second) {
8310 HasMapBasePtr = any_of(Range: M, P: [](const MapInfo &L) {
8311 return isa_and_present<DeclRefExpr>(Val: L.VarRef);
8312 });
8313 HasMapArraySec = any_of(Range: M, P: [](const MapInfo &L) {
8314 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8315 Val: L.VarRef);
8316 });
8317 if (HasMapBasePtr && HasMapArraySec)
8318 break;
8319 }
8320 }
8321 for (const auto &M : Data.second) {
8322 for (const MapInfo &L : M) {
8323 assert(!L.Components.empty() &&
8324 "Not expecting declaration with no component lists.");
8325
8326 // Remember the current base pointer index.
8327 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8328 unsigned StructBasePointersIdx =
8329 StructBaseCurInfo.BasePointers.size();
8330 CurInfo.NonContigInfo.IsNonContiguous =
8331 L.Components.back().isNonContiguous();
8332 generateInfoForComponentList(
8333 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8334 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8335 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8336 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8337 MapExpr: L.VarRef, /*OverlappedElements*/ {},
8338 AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8339
8340 // If this entry relates to a device pointer, set the relevant
8341 // declaration and add the 'return pointer' flag.
8342 if (L.ReturnDevicePointer) {
8343 // Check whether a value was added to either CurInfo or
8344 // StructBaseCurInfo and error if no value was added to either of
8345 // them:
8346 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8347 StructBasePointersIdx <
8348 StructBaseCurInfo.BasePointers.size()) &&
8349 "Unexpected number of mapped base pointers.");
8350
8351 // Choose a base pointer index which is always valid:
8352 const ValueDecl *RelevantVD =
8353 L.Components.back().getAssociatedDeclaration();
8354 assert(RelevantVD &&
8355 "No relevant declaration related with device pointer??");
8356
8357 // If StructBaseCurInfo has been updated this iteration then work on
8358 // the first new entry added to it i.e. make sure that when multiple
8359 // values are added to any of the lists, the first value added is
8360 // being modified by the assignments below (not the last value
8361 // added).
8362 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8363 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8364 RelevantVD;
8365 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8366 L.ForDeviceAddr ? DeviceInfoTy::Address
8367 : DeviceInfoTy::Pointer;
8368 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8369 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8370 } else {
8371 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8372 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8373 L.ForDeviceAddr ? DeviceInfoTy::Address
8374 : DeviceInfoTy::Pointer;
8375 CurInfo.Types[CurrentBasePointersIdx] |=
8376 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8377 }
8378 }
8379 }
8380 }
8381
8382 // Append any pending zero-length pointers which are struct members and
8383 // used with use_device_ptr or use_device_addr.
8384 auto CI = DeferredInfo.find(Key: Data.first);
8385 if (CI != DeferredInfo.end()) {
8386 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8387 llvm::Value *BasePtr;
8388 llvm::Value *Ptr;
8389 if (L.ForDeviceAddr) {
8390 if (L.IE->isGLValue())
8391 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8392 else
8393 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8394 BasePtr = Ptr;
8395 // Entry is RETURN_PARAM. Also, set the placeholder value
8396 // MEMBER_OF=FFFF so that the entry is later updated with the
8397 // correct value of MEMBER_OF.
8398 CurInfo.Types.push_back(
8399 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8400 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8401 } else {
8402 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8403 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8404 Loc: L.IE->getExprLoc());
8405 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8406 // placeholder value MEMBER_OF=FFFF so that the entry is later
8407 // updated with the correct value of MEMBER_OF.
8408 CurInfo.Types.push_back(
8409 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8410 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8411 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8412 }
8413 CurInfo.Exprs.push_back(Elt: L.VD);
8414 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8415 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8416 CurInfo.DevicePointers.emplace_back(
8417 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8418 CurInfo.Pointers.push_back(Elt: Ptr);
8419 CurInfo.Sizes.push_back(
8420 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8421 CurInfo.Mappers.push_back(Elt: nullptr);
8422 }
8423 }
8424
8425 // Unify entries in one list making sure the struct mapping precedes the
8426 // individual fields:
8427 MapCombinedInfoTy UnionCurInfo;
8428 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8429 UnionCurInfo.append(CurInfo);
8430
8431 // If there is an entry in PartialStruct it means we have a struct with
8432 // individual members mapped. Emit an extra combined entry.
8433 if (PartialStruct.Base.isValid()) {
8434 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8435 // Emit a combined entry:
8436 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8437 /*IsMapThis*/ !VD, OMPBuilder, VD);
8438 }
8439
8440 // We need to append the results of this capture to what we already have.
8441 CombinedInfo.append(CurInfo&: UnionCurInfo);
8442 }
8443 // Append data for use_device_ptr clauses.
8444 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8445 }
8446
8447public:
8448 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8449 : CurDir(&Dir), CGF(CGF) {
8450 // Extract firstprivate clause information.
8451 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8452 for (const auto *D : C->varlist())
8453 FirstPrivateDecls.try_emplace(
8454 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl()), C->isImplicit());
8455 // Extract implicit firstprivates from uses_allocators clauses.
8456 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8457 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8458 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8459 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8460 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8461 /*Implicit=*/Args: true);
8462 else if (const auto *VD = dyn_cast<VarDecl>(
8463 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8464 ->getDecl()))
8465 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8466 }
8467 }
8468 // Extract device pointer clause information.
8469 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8470 for (auto L : C->component_lists())
8471 DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
8472 // Extract device addr clause information.
8473 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8474 for (auto L : C->component_lists())
8475 HasDevAddrsMap[std::get<0>(L)].push_back(std::get<1>(L));
8476 // Extract map information.
8477 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8478 if (C->getMapType() != OMPC_MAP_to)
8479 continue;
8480 for (auto L : C->component_lists()) {
8481 const ValueDecl *VD = std::get<0>(L);
8482 const auto *RD = VD ? VD->getType()
8483 .getCanonicalType()
8484 .getNonReferenceType()
8485 ->getAsCXXRecordDecl()
8486 : nullptr;
8487 if (RD && RD->isLambda())
8488 LambdasMap.try_emplace(std::get<0>(L), C);
8489 }
8490 }
8491 }
8492
8493 /// Constructor for the declare mapper directive.
8494 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8495 : CurDir(&Dir), CGF(CGF) {}
8496
8497 /// Generate code for the combined entry if we have a partially mapped struct
8498 /// and take care of the mapping flags of the arguments corresponding to
8499 /// individual struct members.
8500 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8501 MapFlagsArrayTy &CurTypes,
8502 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8503 llvm::OpenMPIRBuilder &OMPBuilder,
8504 const ValueDecl *VD = nullptr,
8505 bool NotTargetParams = true) const {
8506 if (CurTypes.size() == 1 &&
8507 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8508 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8509 !PartialStruct.IsArraySection)
8510 return;
8511 Address LBAddr = PartialStruct.LowestElem.second;
8512 Address HBAddr = PartialStruct.HighestElem.second;
8513 if (PartialStruct.HasCompleteRecord) {
8514 LBAddr = PartialStruct.LB;
8515 HBAddr = PartialStruct.LB;
8516 }
8517 CombinedInfo.Exprs.push_back(Elt: VD);
8518 // Base is the base of the struct
8519 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8520 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8521 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8522 // Pointer is the address of the lowest element
8523 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8524 const CXXMethodDecl *MD =
8525 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8526 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8527 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8528 // There should not be a mapper for a combined entry.
8529 if (HasBaseClass) {
8530 // OpenMP 5.2 148:21:
8531 // If the target construct is within a class non-static member function,
8532 // and a variable is an accessible data member of the object for which the
8533 // non-static data member function is invoked, the variable is treated as
8534 // if the this[:1] expression had appeared in a map clause with a map-type
8535 // of tofrom.
8536 // Emit this[:1]
8537 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8538 QualType Ty = MD->getFunctionObjectParameterType();
8539 llvm::Value *Size =
8540 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8541 /*isSigned=*/true);
8542 CombinedInfo.Sizes.push_back(Elt: Size);
8543 } else {
8544 CombinedInfo.Pointers.push_back(Elt: LB);
8545 // Size is (addr of {highest+1} element) - (addr of lowest element)
8546 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8547 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8548 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8549 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8550 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8551 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8552 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8553 /*isSigned=*/false);
8554 CombinedInfo.Sizes.push_back(Elt: Size);
8555 }
8556 CombinedInfo.Mappers.push_back(Elt: nullptr);
8557 // Map type is always TARGET_PARAM, if generate info for captures.
8558 CombinedInfo.Types.push_back(
8559 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8560 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8561 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8562 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8563 // If any element has the present modifier, then make sure the runtime
8564 // doesn't attempt to allocate the struct.
8565 if (CurTypes.end() !=
8566 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8567 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8568 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8569 }))
8570 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8571 // Remove TARGET_PARAM flag from the first element
8572 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8573 // If any element has the ompx_hold modifier, then make sure the runtime
8574 // uses the hold reference count for the struct as a whole so that it won't
8575 // be unmapped by an extra dynamic reference count decrement. Add it to all
8576 // elements as well so the runtime knows which reference count to check
8577 // when determining whether it's time for device-to-host transfers of
8578 // individual elements.
8579 if (CurTypes.end() !=
8580 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8581 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8582 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8583 })) {
8584 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8585 for (auto &M : CurTypes)
8586 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8587 }
8588
8589 // All other current entries will be MEMBER_OF the combined entry
8590 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8591 // 0xFFFF in the MEMBER_OF field).
8592 OpenMPOffloadMappingFlags MemberOfFlag =
8593 OMPBuilder.getMemberOfFlag(Position: CombinedInfo.BasePointers.size() - 1);
8594 for (auto &M : CurTypes)
8595 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8596 }
8597
8598 /// Generate all the base pointers, section pointers, sizes, map types, and
8599 /// mappers for the extracted mappable expressions (all included in \a
8600 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8601 /// pair of the relevant declaration and index where it occurs is appended to
8602 /// the device pointers info array.
8603 void generateAllInfo(
8604 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8605 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8606 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8607 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8608 "Expect a executable directive");
8609 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8610 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8611 SkipVarSet);
8612 }
8613
8614 /// Generate all the base pointers, section pointers, sizes, map types, and
8615 /// mappers for the extracted map clauses of user-defined mapper (all included
8616 /// in \a CombinedInfo).
8617 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8618 llvm::OpenMPIRBuilder &OMPBuilder) const {
8619 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8620 "Expect a declare mapper directive");
8621 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
8622 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8623 OMPBuilder);
8624 }
8625
8626 /// Emit capture info for lambdas for variables captured by reference.
8627 void generateInfoForLambdaCaptures(
8628 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8629 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8630 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8631 const auto *RD = VDType->getAsCXXRecordDecl();
8632 if (!RD || !RD->isLambda())
8633 return;
8634 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8635 CGF.getContext().getDeclAlign(VD));
8636 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8637 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8638 FieldDecl *ThisCapture = nullptr;
8639 RD->getCaptureFields(Captures, ThisCapture);
8640 if (ThisCapture) {
8641 LValue ThisLVal =
8642 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8643 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8644 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8645 Args: VDLVal.getPointer(CGF));
8646 CombinedInfo.Exprs.push_back(Elt: VD);
8647 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8648 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8649 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8650 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8651 CombinedInfo.Sizes.push_back(
8652 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8653 CGF.Int64Ty, /*isSigned=*/true));
8654 CombinedInfo.Types.push_back(
8655 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8656 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8657 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8658 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8659 CombinedInfo.Mappers.push_back(Elt: nullptr);
8660 }
8661 for (const LambdaCapture &LC : RD->captures()) {
8662 if (!LC.capturesVariable())
8663 continue;
8664 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8665 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8666 continue;
8667 auto It = Captures.find(VD);
8668 assert(It != Captures.end() && "Found lambda capture without field.");
8669 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8670 if (LC.getCaptureKind() == LCK_ByRef) {
8671 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8672 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8673 Args: VDLVal.getPointer(CGF));
8674 CombinedInfo.Exprs.push_back(VD);
8675 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8676 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8677 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8678 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8679 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8680 CGF.getTypeSize(
8681 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8682 CGF.Int64Ty, /*isSigned=*/true));
8683 } else {
8684 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8685 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8686 Args: VDLVal.getPointer(CGF));
8687 CombinedInfo.Exprs.push_back(VD);
8688 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8689 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8690 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8691 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8692 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8693 }
8694 CombinedInfo.Types.push_back(
8695 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8696 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8697 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8698 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8699 CombinedInfo.Mappers.push_back(Elt: nullptr);
8700 }
8701 }
8702
8703 /// Set correct indices for lambdas captures.
8704 void adjustMemberOfForLambdaCaptures(
8705 llvm::OpenMPIRBuilder &OMPBuilder,
8706 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8707 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8708 MapFlagsArrayTy &Types) const {
8709 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8710 // Set correct member_of idx for all implicit lambda captures.
8711 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8712 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8713 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8714 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8715 continue;
8716 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8717 assert(BasePtr && "Unable to find base lambda address.");
8718 int TgtIdx = -1;
8719 for (unsigned J = I; J > 0; --J) {
8720 unsigned Idx = J - 1;
8721 if (Pointers[Idx] != BasePtr)
8722 continue;
8723 TgtIdx = Idx;
8724 break;
8725 }
8726 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8727 // All other current entries will be MEMBER_OF the combined entry
8728 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8729 // 0xFFFF in the MEMBER_OF field).
8730 OpenMPOffloadMappingFlags MemberOfFlag =
8731 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8732 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8733 }
8734 }
8735
8736 /// Generate the base pointers, section pointers, sizes, map types, and
8737 /// mappers associated to a given capture (all included in \a CombinedInfo).
8738 void generateInfoForCapture(const CapturedStmt::Capture *Cap,
8739 llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8740 StructRangeInfoTy &PartialStruct) const {
8741 assert(!Cap->capturesVariableArrayType() &&
8742 "Not expecting to generate map info for a variable array type!");
8743
8744 // We need to know when we generating information for the first component
8745 const ValueDecl *VD = Cap->capturesThis()
8746 ? nullptr
8747 : Cap->getCapturedVar()->getCanonicalDecl();
8748
8749 // for map(to: lambda): skip here, processing it in
8750 // generateDefaultMapInfo
8751 if (LambdasMap.count(Val: VD))
8752 return;
8753
8754 // If this declaration appears in a is_device_ptr clause we just have to
8755 // pass the pointer by value. If it is a reference to a declaration, we just
8756 // pass its value.
8757 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8758 CombinedInfo.Exprs.push_back(Elt: VD);
8759 CombinedInfo.BasePointers.emplace_back(Args&: Arg);
8760 CombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8761 CombinedInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8762 CombinedInfo.Pointers.push_back(Elt: Arg);
8763 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8764 CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), CGF.Int64Ty,
8765 /*isSigned=*/true));
8766 CombinedInfo.Types.push_back(
8767 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8768 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8769 CombinedInfo.Mappers.push_back(Elt: nullptr);
8770 return;
8771 }
8772
8773 using MapData =
8774 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
8775 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
8776 const ValueDecl *, const Expr *>;
8777 SmallVector<MapData, 4> DeclComponentLists;
8778 // For member fields list in is_device_ptr, store it in
8779 // DeclComponentLists for generating components info.
8780 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8781 auto It = DevPointersMap.find(Val: VD);
8782 if (It != DevPointersMap.end())
8783 for (const auto &MCL : It->second)
8784 DeclComponentLists.emplace_back(MCL, OMPC_MAP_to, Unknown,
8785 /*IsImpicit = */ true, nullptr,
8786 nullptr);
8787 auto I = HasDevAddrsMap.find(Val: VD);
8788 if (I != HasDevAddrsMap.end())
8789 for (const auto &MCL : I->second)
8790 DeclComponentLists.emplace_back(MCL, OMPC_MAP_tofrom, Unknown,
8791 /*IsImpicit = */ true, nullptr,
8792 nullptr);
8793 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8794 "Expect a executable directive");
8795 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8796 bool HasMapBasePtr = false;
8797 bool HasMapArraySec = false;
8798 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8799 const auto *EI = C->getVarRefs().begin();
8800 for (const auto L : C->decl_component_lists(VD)) {
8801 const ValueDecl *VDecl, *Mapper;
8802 // The Expression is not correct if the mapping is implicit
8803 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8804 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8805 std::tie(VDecl, Components, Mapper) = L;
8806 assert(VDecl == VD && "We got information for the wrong declaration??");
8807 assert(!Components.empty() &&
8808 "Not expecting declaration with no component lists.");
8809 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(E))
8810 HasMapBasePtr = true;
8811 if (VD && E && VD->getType()->isAnyPointerType() &&
8812 (isa<ArraySectionExpr>(E) || isa<ArraySubscriptExpr>(E)))
8813 HasMapArraySec = true;
8814 DeclComponentLists.emplace_back(Components, C->getMapType(),
8815 C->getMapTypeModifiers(),
8816 C->isImplicit(), Mapper, E);
8817 ++EI;
8818 }
8819 }
8820 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8821 const MapData &RHS) {
8822 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8823 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8824 bool HasPresent =
8825 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8826 bool HasAllocs = MapType == OMPC_MAP_alloc;
8827 MapModifiers = std::get<2>(t: RHS);
8828 MapType = std::get<1>(t: LHS);
8829 bool HasPresentR =
8830 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8831 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8832 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8833 });
8834
8835 // Find overlapping elements (including the offset from the base element).
8836 llvm::SmallDenseMap<
8837 const MapData *,
8838 llvm::SmallVector<
8839 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8840 4>
8841 OverlappedData;
8842 size_t Count = 0;
8843 for (const MapData &L : DeclComponentLists) {
8844 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8845 OpenMPMapClauseKind MapType;
8846 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8847 bool IsImplicit;
8848 const ValueDecl *Mapper;
8849 const Expr *VarRef;
8850 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8851 L;
8852 ++Count;
8853 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8854 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8855 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8856 args&: VarRef) = L1;
8857 auto CI = Components.rbegin();
8858 auto CE = Components.rend();
8859 auto SI = Components1.rbegin();
8860 auto SE = Components1.rend();
8861 for (; CI != CE && SI != SE; ++CI, ++SI) {
8862 if (CI->getAssociatedExpression()->getStmtClass() !=
8863 SI->getAssociatedExpression()->getStmtClass())
8864 break;
8865 // Are we dealing with different variables/fields?
8866 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8867 break;
8868 }
8869 // Found overlapping if, at least for one component, reached the head
8870 // of the components list.
8871 if (CI == CE || SI == SE) {
8872 // Ignore it if it is the same component.
8873 if (CI == CE && SI == SE)
8874 continue;
8875 const auto It = (SI == SE) ? CI : SI;
8876 // If one component is a pointer and another one is a kind of
8877 // dereference of this pointer (array subscript, section, dereference,
8878 // etc.), it is not an overlapping.
8879 // Same, if one component is a base and another component is a
8880 // dereferenced pointer memberexpr with the same base.
8881 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8882 (std::prev(x: It)->getAssociatedDeclaration() &&
8883 std::prev(x: It)
8884 ->getAssociatedDeclaration()
8885 ->getType()
8886 ->isPointerType()) ||
8887 (It->getAssociatedDeclaration() &&
8888 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8889 std::next(x: It) != CE && std::next(x: It) != SE))
8890 continue;
8891 const MapData &BaseData = CI == CE ? L : L1;
8892 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8893 SI == SE ? Components : Components1;
8894 OverlappedData[&BaseData].push_back(Elt: SubData);
8895 }
8896 }
8897 }
8898 // Sort the overlapped elements for each item.
8899 llvm::SmallVector<const FieldDecl *, 4> Layout;
8900 if (!OverlappedData.empty()) {
8901 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8902 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8903 while (BaseType != OrigType) {
8904 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8905 OrigType = BaseType->getPointeeOrArrayElementType();
8906 }
8907
8908 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8909 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8910 else {
8911 const auto *RD = BaseType->getAsRecordDecl();
8912 Layout.append(RD->field_begin(), RD->field_end());
8913 }
8914 }
8915 for (auto &Pair : OverlappedData) {
8916 llvm::stable_sort(
8917 Range&: Pair.getSecond(),
8918 C: [&Layout](
8919 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8920 OMPClauseMappableExprCommon::MappableExprComponentListRef
8921 Second) {
8922 auto CI = First.rbegin();
8923 auto CE = First.rend();
8924 auto SI = Second.rbegin();
8925 auto SE = Second.rend();
8926 for (; CI != CE && SI != SE; ++CI, ++SI) {
8927 if (CI->getAssociatedExpression()->getStmtClass() !=
8928 SI->getAssociatedExpression()->getStmtClass())
8929 break;
8930 // Are we dealing with different variables/fields?
8931 if (CI->getAssociatedDeclaration() !=
8932 SI->getAssociatedDeclaration())
8933 break;
8934 }
8935
8936 // Lists contain the same elements.
8937 if (CI == CE && SI == SE)
8938 return false;
8939
8940 // List with less elements is less than list with more elements.
8941 if (CI == CE || SI == SE)
8942 return CI == CE;
8943
8944 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8945 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8946 if (FD1->getParent() == FD2->getParent())
8947 return FD1->getFieldIndex() < FD2->getFieldIndex();
8948 const auto *It =
8949 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8950 return FD == FD1 || FD == FD2;
8951 });
8952 return *It == FD1;
8953 });
8954 }
8955
8956 // Associated with a capture, because the mapping flags depend on it.
8957 // Go through all of the elements with the overlapped elements.
8958 bool IsFirstComponentList = true;
8959 MapCombinedInfoTy StructBaseCombinedInfo;
8960 for (const auto &Pair : OverlappedData) {
8961 const MapData &L = *Pair.getFirst();
8962 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8963 OpenMPMapClauseKind MapType;
8964 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8965 bool IsImplicit;
8966 const ValueDecl *Mapper;
8967 const Expr *VarRef;
8968 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8969 L;
8970 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
8971 OverlappedComponents = Pair.getSecond();
8972 generateInfoForComponentList(
8973 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo,
8974 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8975 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8976 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
8977 IsFirstComponentList = false;
8978 }
8979 // Go through other elements without overlapped elements.
8980 for (const MapData &L : DeclComponentLists) {
8981 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8982 OpenMPMapClauseKind MapType;
8983 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8984 bool IsImplicit;
8985 const ValueDecl *Mapper;
8986 const Expr *VarRef;
8987 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8988 L;
8989 auto It = OverlappedData.find(Val: &L);
8990 if (It == OverlappedData.end())
8991 generateInfoForComponentList(
8992 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo,
8993 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList,
8994 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
8995 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
8996 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8997 IsFirstComponentList = false;
8998 }
8999 }
9000
9001 /// Generate the default map information for a given capture \a CI,
9002 /// record field declaration \a RI and captured value \a CV.
9003 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9004 const FieldDecl &RI, llvm::Value *CV,
9005 MapCombinedInfoTy &CombinedInfo) const {
9006 bool IsImplicit = true;
9007 // Do the default mapping.
9008 if (CI.capturesThis()) {
9009 CombinedInfo.Exprs.push_back(Elt: nullptr);
9010 CombinedInfo.BasePointers.push_back(Elt: CV);
9011 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9012 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9013 CombinedInfo.Pointers.push_back(Elt: CV);
9014 const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
9015 CombinedInfo.Sizes.push_back(
9016 Elt: CGF.Builder.CreateIntCast(CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
9017 CGF.Int64Ty, /*isSigned=*/true));
9018 // Default map type.
9019 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
9020 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9021 } else if (CI.capturesVariableByCopy()) {
9022 const VarDecl *VD = CI.getCapturedVar();
9023 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9024 CombinedInfo.BasePointers.push_back(Elt: CV);
9025 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9026 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9027 CombinedInfo.Pointers.push_back(Elt: CV);
9028 if (!RI.getType()->isAnyPointerType()) {
9029 // We have to signal to the runtime captures passed by value that are
9030 // not pointers.
9031 CombinedInfo.Types.push_back(
9032 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9033 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9034 CGF.getTypeSize(Ty: RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
9035 } else {
9036 // Pointers are implicitly mapped with a zero size and no flags
9037 // (other than first map that is added for all implicit maps).
9038 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9039 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
9040 }
9041 auto I = FirstPrivateDecls.find(Val: VD);
9042 if (I != FirstPrivateDecls.end())
9043 IsImplicit = I->getSecond();
9044 } else {
9045 assert(CI.capturesVariable() && "Expected captured reference.");
9046 const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
9047 QualType ElementType = PtrTy->getPointeeType();
9048 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9049 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9050 // The default map type for a scalar/complex type is 'to' because by
9051 // default the value doesn't have to be retrieved. For an aggregate
9052 // type, the default is 'tofrom'.
9053 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
9054 const VarDecl *VD = CI.getCapturedVar();
9055 auto I = FirstPrivateDecls.find(Val: VD);
9056 CombinedInfo.Exprs.push_back(VD->getCanonicalDecl());
9057 CombinedInfo.BasePointers.push_back(Elt: CV);
9058 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9059 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9060 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9061 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
9062 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(VD),
9063 Source: AlignmentSource::Decl));
9064 CombinedInfo.Pointers.push_back(Elt: PtrAddr.emitRawPointer(CGF));
9065 } else {
9066 CombinedInfo.Pointers.push_back(Elt: CV);
9067 }
9068 if (I != FirstPrivateDecls.end())
9069 IsImplicit = I->getSecond();
9070 }
9071 // Every default map produces a single argument which is a target parameter.
9072 CombinedInfo.Types.back() |=
9073 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9074
9075 // Add flag stating this is an implicit map.
9076 if (IsImplicit)
9077 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9078
9079 // No user-defined mapper for default mapping.
9080 CombinedInfo.Mappers.push_back(Elt: nullptr);
9081 }
9082};
9083} // anonymous namespace
9084
9085// Try to extract the base declaration from a `this->x` expression if possible.
9086static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9087 if (!E)
9088 return nullptr;
9089
9090 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
9091 if (const MemberExpr *ME =
9092 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
9093 return ME->getMemberDecl();
9094 return nullptr;
9095}
9096
9097/// Emit a string constant containing the names of the values mapped to the
9098/// offloading runtime library.
9099static llvm::Constant *
9100emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9101 MappableExprsHandler::MappingExprInfo &MapExprs) {
9102
9103 uint32_t SrcLocStrSize;
9104 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9105 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9106
9107 SourceLocation Loc;
9108 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9109 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
9110 Loc = VD->getLocation();
9111 else
9112 Loc = MapExprs.getMapExpr()->getExprLoc();
9113 } else {
9114 Loc = MapExprs.getMapDecl()->getLocation();
9115 }
9116
9117 std::string ExprName;
9118 if (MapExprs.getMapExpr()) {
9119 PrintingPolicy P(CGF.getContext().getLangOpts());
9120 llvm::raw_string_ostream OS(ExprName);
9121 MapExprs.getMapExpr()->printPretty(OS, nullptr, P);
9122 } else {
9123 ExprName = MapExprs.getMapDecl()->getNameAsString();
9124 }
9125
9126 std::string FileName;
9127 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9128 if (auto *DbgInfo = CGF.getDebugInfo())
9129 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9130 else
9131 FileName = PLoc.getFilename();
9132 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
9133 Column: PLoc.getColumn(), SrcLocStrSize);
9134}
9135/// Emit the arrays used to pass the captures and map information to the
9136/// offloading runtime library. If there is no map or capture information,
9137/// return nullptr by reference.
9138static void emitOffloadingArraysAndArgs(
9139 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9140 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9141 bool IsNonContiguous = false, bool ForEndCall = false) {
9142 CodeGenModule &CGM = CGF.CGM;
9143
9144 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9145 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9146 CGF.AllocaInsertPt->getIterator());
9147 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9148 CGF.Builder.GetInsertPoint());
9149
9150 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9151 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9152 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
9153 }
9154 };
9155
9156 auto CustomMapperCB = [&](unsigned int I) {
9157 llvm::Function *MFunc = nullptr;
9158 if (CombinedInfo.Mappers[I]) {
9159 Info.HasMapper = true;
9160 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9161 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9162 }
9163 return MFunc;
9164 };
9165 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
9166 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
9167 IsNonContiguous, ForEndCall, DeviceAddrCB));
9168}
9169
9170/// Check for inner distribute directive.
9171static const OMPExecutableDirective *
9172getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9173 const auto *CS = D.getInnermostCapturedStmt();
9174 const auto *Body =
9175 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9176 const Stmt *ChildStmt =
9177 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9178
9179 if (const auto *NestedDir =
9180 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9181 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9182 switch (D.getDirectiveKind()) {
9183 case OMPD_target:
9184 // For now, treat 'target' with nested 'teams loop' as if it's
9185 // distributed (target teams distribute).
9186 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9187 return NestedDir;
9188 if (DKind == OMPD_teams) {
9189 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9190 /*IgnoreCaptured=*/true);
9191 if (!Body)
9192 return nullptr;
9193 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9194 if (const auto *NND =
9195 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9196 DKind = NND->getDirectiveKind();
9197 if (isOpenMPDistributeDirective(DKind))
9198 return NND;
9199 }
9200 }
9201 return nullptr;
9202 case OMPD_target_teams:
9203 if (isOpenMPDistributeDirective(DKind))
9204 return NestedDir;
9205 return nullptr;
9206 case OMPD_target_parallel:
9207 case OMPD_target_simd:
9208 case OMPD_target_parallel_for:
9209 case OMPD_target_parallel_for_simd:
9210 return nullptr;
9211 case OMPD_target_teams_distribute:
9212 case OMPD_target_teams_distribute_simd:
9213 case OMPD_target_teams_distribute_parallel_for:
9214 case OMPD_target_teams_distribute_parallel_for_simd:
9215 case OMPD_parallel:
9216 case OMPD_for:
9217 case OMPD_parallel_for:
9218 case OMPD_parallel_master:
9219 case OMPD_parallel_sections:
9220 case OMPD_for_simd:
9221 case OMPD_parallel_for_simd:
9222 case OMPD_cancel:
9223 case OMPD_cancellation_point:
9224 case OMPD_ordered:
9225 case OMPD_threadprivate:
9226 case OMPD_allocate:
9227 case OMPD_task:
9228 case OMPD_simd:
9229 case OMPD_tile:
9230 case OMPD_unroll:
9231 case OMPD_sections:
9232 case OMPD_section:
9233 case OMPD_single:
9234 case OMPD_master:
9235 case OMPD_critical:
9236 case OMPD_taskyield:
9237 case OMPD_barrier:
9238 case OMPD_taskwait:
9239 case OMPD_taskgroup:
9240 case OMPD_atomic:
9241 case OMPD_flush:
9242 case OMPD_depobj:
9243 case OMPD_scan:
9244 case OMPD_teams:
9245 case OMPD_target_data:
9246 case OMPD_target_exit_data:
9247 case OMPD_target_enter_data:
9248 case OMPD_distribute:
9249 case OMPD_distribute_simd:
9250 case OMPD_distribute_parallel_for:
9251 case OMPD_distribute_parallel_for_simd:
9252 case OMPD_teams_distribute:
9253 case OMPD_teams_distribute_simd:
9254 case OMPD_teams_distribute_parallel_for:
9255 case OMPD_teams_distribute_parallel_for_simd:
9256 case OMPD_target_update:
9257 case OMPD_declare_simd:
9258 case OMPD_declare_variant:
9259 case OMPD_begin_declare_variant:
9260 case OMPD_end_declare_variant:
9261 case OMPD_declare_target:
9262 case OMPD_end_declare_target:
9263 case OMPD_declare_reduction:
9264 case OMPD_declare_mapper:
9265 case OMPD_taskloop:
9266 case OMPD_taskloop_simd:
9267 case OMPD_master_taskloop:
9268 case OMPD_master_taskloop_simd:
9269 case OMPD_parallel_master_taskloop:
9270 case OMPD_parallel_master_taskloop_simd:
9271 case OMPD_requires:
9272 case OMPD_metadirective:
9273 case OMPD_unknown:
9274 default:
9275 llvm_unreachable("Unexpected directive.");
9276 }
9277 }
9278
9279 return nullptr;
9280}
9281
9282/// Emit the user-defined mapper function. The code generation follows the
9283/// pattern in the example below.
9284/// \code
9285/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9286/// void *base, void *begin,
9287/// int64_t size, int64_t type,
9288/// void *name = nullptr) {
9289/// // Allocate space for an array section first or add a base/begin for
9290/// // pointer dereference.
9291/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9292/// !maptype.IsDelete)
9293/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9294/// size*sizeof(Ty), clearToFromMember(type));
9295/// // Map members.
9296/// for (unsigned i = 0; i < size; i++) {
9297/// // For each component specified by this mapper:
9298/// for (auto c : begin[i]->all_components) {
9299/// if (c.hasMapper())
9300/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9301/// c.arg_type, c.arg_name);
9302/// else
9303/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9304/// c.arg_begin, c.arg_size, c.arg_type,
9305/// c.arg_name);
9306/// }
9307/// }
9308/// // Delete the array section.
9309/// if (size > 1 && maptype.IsDelete)
9310/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9311/// size*sizeof(Ty), clearToFromMember(type));
9312/// }
9313/// \endcode
9314void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9315 CodeGenFunction *CGF) {
9316 if (UDMMap.count(Val: D) > 0)
9317 return;
9318 ASTContext &C = CGM.getContext();
9319 QualType Ty = D->getType();
9320 auto *MapperVarDecl =
9321 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9322 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9323 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9324
9325 CodeGenFunction MapperCGF(CGM);
9326 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9327 auto PrivatizeAndGenMapInfoCB =
9328 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9329 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9330 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
9331
9332 // Privatize the declared variable of mapper to be the current array
9333 // element.
9334 Address PtrCurrent(
9335 PtrPHI, ElemTy,
9336 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9337 .getAlignment()
9338 .alignmentOfArrayElement(elementSize: ElementSize));
9339 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9340 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9341 (void)Scope.Privatize();
9342
9343 // Get map clause information.
9344 MappableExprsHandler MEHandler(*D, MapperCGF);
9345 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9346
9347 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9348 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
9349 };
9350 if (CGM.getCodeGenOpts().getDebugInfo() !=
9351 llvm::codegenoptions::NoDebugInfo) {
9352 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9353 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9354 F: FillInfoMap);
9355 }
9356
9357 return CombinedInfo;
9358 };
9359
9360 auto CustomMapperCB = [&](unsigned I) {
9361 llvm::Function *MapperFunc = nullptr;
9362 if (CombinedInfo.Mappers[I]) {
9363 // Call the corresponding mapper function.
9364 MapperFunc = getOrCreateUserDefinedMapperFunc(
9365 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9366 assert(MapperFunc && "Expect a valid mapper function is available.");
9367 }
9368 return MapperFunc;
9369 };
9370
9371 SmallString<64> TyStr;
9372 llvm::raw_svector_ostream Out(TyStr);
9373 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9374 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9375
9376 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
9377 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
9378 UDMMap.try_emplace(Key: D, Args&: NewFn);
9379 if (CGF)
9380 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
9381}
9382
9383llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9384 const OMPDeclareMapperDecl *D) {
9385 auto I = UDMMap.find(Val: D);
9386 if (I != UDMMap.end())
9387 return I->second;
9388 emitUserDefinedMapper(D);
9389 return UDMMap.lookup(Val: D);
9390}
9391
9392llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9393 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9394 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9395 const OMPLoopDirective &D)>
9396 SizeEmitter) {
9397 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9398 const OMPExecutableDirective *TD = &D;
9399 // Get nested teams distribute kind directive, if any. For now, treat
9400 // 'target_teams_loop' as if it's really a target_teams_distribute.
9401 if ((!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind)) &&
9402 Kind != OMPD_target_teams_loop)
9403 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9404 if (!TD)
9405 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9406
9407 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9408 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9409 return NumIterations;
9410 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9411}
9412
9413static void
9414emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9415 const OMPExecutableDirective &D,
9416 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9417 bool RequiresOuterTask, const CapturedStmt &CS,
9418 bool OffloadingMandatory, CodeGenFunction &CGF) {
9419 if (OffloadingMandatory) {
9420 CGF.Builder.CreateUnreachable();
9421 } else {
9422 if (RequiresOuterTask) {
9423 CapturedVars.clear();
9424 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9425 }
9426 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9427 Args: CapturedVars);
9428 }
9429}
9430
9431static llvm::Value *emitDeviceID(
9432 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9433 CodeGenFunction &CGF) {
9434 // Emit device ID if any.
9435 llvm::Value *DeviceID;
9436 if (Device.getPointer()) {
9437 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9438 Device.getInt() == OMPC_DEVICE_device_num) &&
9439 "Expected device_num modifier.");
9440 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9441 DeviceID =
9442 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9443 } else {
9444 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9445 }
9446 return DeviceID;
9447}
9448
9449static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9450 CodeGenFunction &CGF) {
9451 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9452
9453 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9454 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9455 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9456 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9457 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9458 /*isSigned=*/false);
9459 }
9460 return DynCGroupMem;
9461}
9462static void genMapInfoForCaptures(
9463 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9464 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9465 llvm::OpenMPIRBuilder &OMPBuilder,
9466 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9467 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9468
9469 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9470 auto RI = CS.getCapturedRecordDecl()->field_begin();
9471 auto *CV = CapturedVars.begin();
9472 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9473 CE = CS.capture_end();
9474 CI != CE; ++CI, ++RI, ++CV) {
9475 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9476 MappableExprsHandler::StructRangeInfoTy PartialStruct;
9477
9478 // VLA sizes are passed to the outlined region by copy and do not have map
9479 // information associated.
9480 if (CI->capturesVariableArrayType()) {
9481 CurInfo.Exprs.push_back(Elt: nullptr);
9482 CurInfo.BasePointers.push_back(Elt: *CV);
9483 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9484 CurInfo.DevicePointers.push_back(
9485 Elt: MappableExprsHandler::DeviceInfoTy::None);
9486 CurInfo.Pointers.push_back(Elt: *CV);
9487 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9488 CGF.getTypeSize(Ty: RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
9489 // Copy to the device as an argument. No need to retrieve it.
9490 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9491 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9492 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9493 CurInfo.Mappers.push_back(Elt: nullptr);
9494 } else {
9495 // If we have any information in the map clause, we use it, otherwise we
9496 // just do a default mapping.
9497 MEHandler.generateInfoForCapture(Cap: CI, Arg: *CV, CombinedInfo&: CurInfo, PartialStruct);
9498 if (!CI->capturesThis())
9499 MappedVarSet.insert(CI->getCapturedVar());
9500 else
9501 MappedVarSet.insert(V: nullptr);
9502 if (CurInfo.BasePointers.empty() && !PartialStruct.Base.isValid())
9503 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9504 // Generate correct mapping for variables captured by reference in
9505 // lambdas.
9506 if (CI->capturesVariable())
9507 MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
9508 CurInfo, LambdaPointers);
9509 }
9510 // We expect to have at least an element of information for this capture.
9511 assert((!CurInfo.BasePointers.empty() || PartialStruct.Base.isValid()) &&
9512 "Non-existing map pointer for capture!");
9513 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9514 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9515 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9516 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9517 "Inconsistent map information sizes!");
9518
9519 // If there is an entry in PartialStruct it means we have a struct with
9520 // individual members mapped. Emit an extra combined entry.
9521 if (PartialStruct.Base.isValid()) {
9522 CombinedInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
9523 MEHandler.emitCombinedEntry(CombinedInfo, CurTypes&: CurInfo.Types, PartialStruct,
9524 IsMapThis: CI->capturesThis(), OMPBuilder, VD: nullptr,
9525 /*NotTargetParams*/ false);
9526 }
9527
9528 // We need to append the results of this capture to what we already have.
9529 CombinedInfo.append(CurInfo);
9530 }
9531 // Adjust MEMBER_OF flags for the lambdas captures.
9532 MEHandler.adjustMemberOfForLambdaCaptures(
9533 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9534 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9535}
9536static void
9537genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9538 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9539 llvm::OpenMPIRBuilder &OMPBuilder,
9540 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9541 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9542
9543 CodeGenModule &CGM = CGF.CGM;
9544 // Map any list items in a map clause that were not captures because they
9545 // weren't referenced within the construct.
9546 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
9547
9548 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9549 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
9550 };
9551 if (CGM.getCodeGenOpts().getDebugInfo() !=
9552 llvm::codegenoptions::NoDebugInfo) {
9553 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9554 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9555 F: FillInfoMap);
9556 }
9557}
9558
9559static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9560 const CapturedStmt &CS,
9561 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9562 llvm::OpenMPIRBuilder &OMPBuilder,
9563 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9564 // Get mappable expression information.
9565 MappableExprsHandler MEHandler(D, CGF);
9566 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9567
9568 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9569 MappedVarSet, CombinedInfo);
9570 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
9571}
9572
9573template <typename ClauseTy>
9574static void
9575emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9576 const OMPExecutableDirective &D,
9577 llvm::SmallVectorImpl<llvm::Value *> &Values) {
9578 const auto *C = D.getSingleClause<ClauseTy>();
9579 assert(!C->varlist_empty() &&
9580 "ompx_bare requires explicit num_teams and thread_limit");
9581 CodeGenFunction::RunCleanupsScope Scope(CGF);
9582 for (auto *E : C->varlist()) {
9583 llvm::Value *V = CGF.EmitScalarExpr(E);
9584 Values.push_back(
9585 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
9586 }
9587}
9588
9589static void emitTargetCallKernelLaunch(
9590 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9591 const OMPExecutableDirective &D,
9592 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9593 const CapturedStmt &CS, bool OffloadingMandatory,
9594 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9595 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9596 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9597 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9598 const OMPLoopDirective &D)>
9599 SizeEmitter,
9600 CodeGenFunction &CGF, CodeGenModule &CGM) {
9601 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9602
9603 // Fill up the arrays with all the captured variables.
9604 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9605 CGOpenMPRuntime::TargetDataInfo Info;
9606 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9607
9608 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9609 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9610
9611 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9612 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9613 CGF.VoidPtrTy, CGM.getPointerAlign());
9614 InputInfo.PointersArray =
9615 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9616 InputInfo.SizesArray =
9617 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9618 InputInfo.MappersArray =
9619 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9620 MapTypesArray = Info.RTArgs.MapTypesArray;
9621 MapNamesArray = Info.RTArgs.MapNamesArray;
9622
9623 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9624 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9625 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9626 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9627 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9628
9629 if (IsReverseOffloading) {
9630 // Reverse offloading is not supported, so just execute on the host.
9631 // FIXME: This fallback solution is incorrect since it ignores the
9632 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9633 // assert here and ensure SEMA emits an error.
9634 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9635 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9636 return;
9637 }
9638
9639 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9640 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9641
9642 llvm::Value *BasePointersArray =
9643 InputInfo.BasePointersArray.emitRawPointer(CGF);
9644 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9645 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9646 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9647
9648 auto &&EmitTargetCallFallbackCB =
9649 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9650 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9651 -> llvm::OpenMPIRBuilder::InsertPointTy {
9652 CGF.Builder.restoreIP(IP);
9653 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9654 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9655 return CGF.Builder.saveIP();
9656 };
9657
9658 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9659 SmallVector<llvm::Value *, 3> NumTeams;
9660 SmallVector<llvm::Value *, 3> NumThreads;
9661 if (IsBare) {
9662 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
9663 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9664 Values&: NumThreads);
9665 } else {
9666 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9667 NumThreads.push_back(
9668 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9669 }
9670
9671 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9672 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9673 llvm::Value *NumIterations =
9674 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9675 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9676 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9677 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9678
9679 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9680 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9681 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9682
9683 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9684 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9685 DynCGGroupMem, HasNoWait);
9686
9687 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9688 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9689 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9690 RTLoc, AllocaIP));
9691 CGF.Builder.restoreIP(IP: AfterIP);
9692 };
9693
9694 if (RequiresOuterTask)
9695 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9696 else
9697 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
9698}
9699
9700static void
9701emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9702 const OMPExecutableDirective &D,
9703 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9704 bool RequiresOuterTask, const CapturedStmt &CS,
9705 bool OffloadingMandatory, CodeGenFunction &CGF) {
9706
9707 // Notify that the host version must be executed.
9708 auto &&ElseGen =
9709 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9710 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9711 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9712 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9713 };
9714
9715 if (RequiresOuterTask) {
9716 CodeGenFunction::OMPTargetDataInfo InputInfo;
9717 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9718 } else {
9719 OMPRuntime->emitInlinedDirective(CGF, D.getDirectiveKind(), ElseGen);
9720 }
9721}
9722
9723void CGOpenMPRuntime::emitTargetCall(
9724 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9725 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9726 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9727 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9728 const OMPLoopDirective &D)>
9729 SizeEmitter) {
9730 if (!CGF.HaveInsertPoint())
9731 return;
9732
9733 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9734 CGM.getLangOpts().OpenMPOffloadMandatory;
9735
9736 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9737
9738 const bool RequiresOuterTask =
9739 D.hasClausesOfKind<OMPDependClause>() ||
9740 D.hasClausesOfKind<OMPNowaitClause>() ||
9741 D.hasClausesOfKind<OMPInReductionClause>() ||
9742 (CGM.getLangOpts().OpenMP >= 51 &&
9743 needsTaskBasedThreadLimit(D.getDirectiveKind()) &&
9744 D.hasClausesOfKind<OMPThreadLimitClause>());
9745 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9746 const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target);
9747 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9748 PrePostActionTy &) {
9749 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9750 };
9751 emitInlinedDirective(CGF, OMPD_unknown, ArgsCodegen);
9752
9753 CodeGenFunction::OMPTargetDataInfo InputInfo;
9754 llvm::Value *MapTypesArray = nullptr;
9755 llvm::Value *MapNamesArray = nullptr;
9756
9757 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9758 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9759 OutlinedFnID, &InputInfo, &MapTypesArray,
9760 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9761 PrePostActionTy &) {
9762 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9763 RequiresOuterTask, CS, OffloadingMandatory,
9764 Device, OutlinedFnID, InputInfo, MapTypesArray,
9765 MapNamesArray, SizeEmitter, CGF, CGM);
9766 };
9767
9768 auto &&TargetElseGen =
9769 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9770 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9771 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9772 CS, OffloadingMandatory, CGF);
9773 };
9774
9775 // If we have a target function ID it means that we need to support
9776 // offloading, otherwise, just execute on the host. We need to execute on host
9777 // regardless of the conditional in the if clause if, e.g., the user do not
9778 // specify target triples.
9779 if (OutlinedFnID) {
9780 if (IfCond) {
9781 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9782 } else {
9783 RegionCodeGenTy ThenRCG(TargetThenGen);
9784 ThenRCG(CGF);
9785 }
9786 } else {
9787 RegionCodeGenTy ElseRCG(TargetElseGen);
9788 ElseRCG(CGF);
9789 }
9790}
9791
9792void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9793 StringRef ParentName) {
9794 if (!S)
9795 return;
9796
9797 // Codegen OMP target directives that offload compute to the device.
9798 bool RequiresDeviceCodegen =
9799 isa<OMPExecutableDirective>(S) &&
9800 isOpenMPTargetExecutionDirective(
9801 cast<OMPExecutableDirective>(S)->getDirectiveKind());
9802
9803 if (RequiresDeviceCodegen) {
9804 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9805
9806 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9807 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9808
9809 // Is this a target region that should not be emitted as an entry point? If
9810 // so just signal we are done with this target region.
9811 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9812 return;
9813
9814 switch (E.getDirectiveKind()) {
9815 case OMPD_target:
9816 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9817 S: cast<OMPTargetDirective>(Val: E));
9818 break;
9819 case OMPD_target_parallel:
9820 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9821 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9822 break;
9823 case OMPD_target_teams:
9824 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9825 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9826 break;
9827 case OMPD_target_teams_distribute:
9828 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9829 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9830 break;
9831 case OMPD_target_teams_distribute_simd:
9832 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9833 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9834 break;
9835 case OMPD_target_parallel_for:
9836 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9837 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9838 break;
9839 case OMPD_target_parallel_for_simd:
9840 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9841 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9842 break;
9843 case OMPD_target_simd:
9844 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9845 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9846 break;
9847 case OMPD_target_teams_distribute_parallel_for:
9848 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9849 CGM, ParentName,
9850 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9851 break;
9852 case OMPD_target_teams_distribute_parallel_for_simd:
9853 CodeGenFunction::
9854 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9855 CGM, ParentName,
9856 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9857 break;
9858 case OMPD_target_teams_loop:
9859 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9860 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9861 break;
9862 case OMPD_target_parallel_loop:
9863 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9864 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9865 break;
9866 case OMPD_parallel:
9867 case OMPD_for:
9868 case OMPD_parallel_for:
9869 case OMPD_parallel_master:
9870 case OMPD_parallel_sections:
9871 case OMPD_for_simd:
9872 case OMPD_parallel_for_simd:
9873 case OMPD_cancel:
9874 case OMPD_cancellation_point:
9875 case OMPD_ordered:
9876 case OMPD_threadprivate:
9877 case OMPD_allocate:
9878 case OMPD_task:
9879 case OMPD_simd:
9880 case OMPD_tile:
9881 case OMPD_unroll:
9882 case OMPD_sections:
9883 case OMPD_section:
9884 case OMPD_single:
9885 case OMPD_master:
9886 case OMPD_critical:
9887 case OMPD_taskyield:
9888 case OMPD_barrier:
9889 case OMPD_taskwait:
9890 case OMPD_taskgroup:
9891 case OMPD_atomic:
9892 case OMPD_flush:
9893 case OMPD_depobj:
9894 case OMPD_scan:
9895 case OMPD_teams:
9896 case OMPD_target_data:
9897 case OMPD_target_exit_data:
9898 case OMPD_target_enter_data:
9899 case OMPD_distribute:
9900 case OMPD_distribute_simd:
9901 case OMPD_distribute_parallel_for:
9902 case OMPD_distribute_parallel_for_simd:
9903 case OMPD_teams_distribute:
9904 case OMPD_teams_distribute_simd:
9905 case OMPD_teams_distribute_parallel_for:
9906 case OMPD_teams_distribute_parallel_for_simd:
9907 case OMPD_target_update:
9908 case OMPD_declare_simd:
9909 case OMPD_declare_variant:
9910 case OMPD_begin_declare_variant:
9911 case OMPD_end_declare_variant:
9912 case OMPD_declare_target:
9913 case OMPD_end_declare_target:
9914 case OMPD_declare_reduction:
9915 case OMPD_declare_mapper:
9916 case OMPD_taskloop:
9917 case OMPD_taskloop_simd:
9918 case OMPD_master_taskloop:
9919 case OMPD_master_taskloop_simd:
9920 case OMPD_parallel_master_taskloop:
9921 case OMPD_parallel_master_taskloop_simd:
9922 case OMPD_requires:
9923 case OMPD_metadirective:
9924 case OMPD_unknown:
9925 default:
9926 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9927 }
9928 return;
9929 }
9930
9931 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9932 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9933 return;
9934
9935 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9936 return;
9937 }
9938
9939 // If this is a lambda function, look into its body.
9940 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9941 S = L->getBody();
9942
9943 // Keep looking for target regions recursively.
9944 for (const Stmt *II : S->children())
9945 scanForTargetRegionsFunctions(S: II, ParentName);
9946}
9947
9948static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9949 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9950 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9951 if (!DevTy)
9952 return false;
9953 // Do not emit device_type(nohost) functions for the host.
9954 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9955 return true;
9956 // Do not emit device_type(host) functions for the device.
9957 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9958 return true;
9959 return false;
9960}
9961
9962bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
9963 // If emitting code for the host, we do not process FD here. Instead we do
9964 // the normal code generation.
9965 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
9966 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
9967 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9968 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9969 return true;
9970 return false;
9971 }
9972
9973 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
9974 // Try to detect target regions in the function.
9975 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
9976 StringRef Name = CGM.getMangledName(GD);
9977 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
9978 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
9979 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9980 return true;
9981 }
9982
9983 // Do not to emit function if it is not marked as declare target.
9984 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
9985 AlreadyEmittedTargetDecls.count(VD) == 0;
9986}
9987
9988bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
9989 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
9990 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
9991 return true;
9992
9993 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
9994 return false;
9995
9996 // Check if there are Ctors/Dtors in this declaration and look for target
9997 // regions in it. We use the complete variant to produce the kernel name
9998 // mangling.
9999 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
10000 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10001 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10002 StringRef ParentName =
10003 CGM.getMangledName(GlobalDecl(Ctor, Ctor_Complete));
10004 scanForTargetRegionsFunctions(Ctor->getBody(), ParentName);
10005 }
10006 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10007 StringRef ParentName =
10008 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
10009 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
10010 }
10011 }
10012
10013 // Do not to emit variable if it is not marked as declare target.
10014 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10015 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10016 cast<VarDecl>(GD.getDecl()));
10017 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10018 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10019 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10020 HasRequiresUnifiedSharedMemory)) {
10021 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
10022 return true;
10023 }
10024 return false;
10025}
10026
10027void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10028 llvm::Constant *Addr) {
10029 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10030 !CGM.getLangOpts().OpenMPIsTargetDevice)
10031 return;
10032
10033 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10034 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10035
10036 // If this is an 'extern' declaration we defer to the canonical definition and
10037 // do not emit an offloading entry.
10038 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10039 VD->hasExternalStorage())
10040 return;
10041
10042 if (!Res) {
10043 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10044 // Register non-target variables being emitted in device code (debug info
10045 // may cause this).
10046 StringRef VarName = CGM.getMangledName(GD: VD);
10047 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
10048 }
10049 return;
10050 }
10051
10052 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
10053 auto LinkageForVariable = [&VD, this]() {
10054 return CGM.getLLVMLinkageVarDefinition(VD);
10055 };
10056
10057 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10058 OMPBuilder.registerTargetGlobalVariable(
10059 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
10060 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10061 IsExternallyVisible: VD->isExternallyVisible(),
10062 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10063 VD->getCanonicalDecl()->getBeginLoc()),
10064 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
10065 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
10066 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
10067 T: CGM.getContext().getPointerType(VD->getType())),
10068 Addr);
10069
10070 for (auto *ref : GeneratedRefs)
10071 CGM.addCompilerUsedGlobal(GV: ref);
10072}
10073
10074bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10075 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
10076 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
10077 return emitTargetFunctions(GD);
10078
10079 return emitTargetGlobalVariable(GD);
10080}
10081
10082void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10083 for (const VarDecl *VD : DeferredGlobalVariables) {
10084 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10085 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10086 if (!Res)
10087 continue;
10088 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10089 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10090 !HasRequiresUnifiedSharedMemory) {
10091 CGM.EmitGlobal(D: VD);
10092 } else {
10093 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10094 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10095 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10096 HasRequiresUnifiedSharedMemory)) &&
10097 "Expected link clause or to clause with unified memory.");
10098 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10099 }
10100 }
10101}
10102
10103void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10104 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10105 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10106 " Expected target-based directive.");
10107}
10108
10109void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10110 for (const OMPClause *Clause : D->clauselists()) {
10111 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10112 HasRequiresUnifiedSharedMemory = true;
10113 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10114 } else if (const auto *AC =
10115 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10116 switch (AC->getAtomicDefaultMemOrderKind()) {
10117 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10118 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10119 break;
10120 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10121 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10122 break;
10123 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10124 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10125 break;
10126 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10127 break;
10128 }
10129 }
10130 }
10131}
10132
10133llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10134 return RequiresAtomicOrdering;
10135}
10136
10137bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10138 LangAS &AS) {
10139 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10140 return false;
10141 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10142 switch(A->getAllocatorType()) {
10143 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10144 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10145 // Not supported, fallback to the default mem space.
10146 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10147 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10148 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10149 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10150 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10151 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10152 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10153 AS = LangAS::Default;
10154 return true;
10155 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10156 llvm_unreachable("Expected predefined allocator for the variables with the "
10157 "static storage.");
10158 }
10159 return false;
10160}
10161
10162bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10163 return HasRequiresUnifiedSharedMemory;
10164}
10165
10166CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10167 CodeGenModule &CGM)
10168 : CGM(CGM) {
10169 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10170 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10171 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10172 }
10173}
10174
10175CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10176 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10177 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10178}
10179
10180bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10181 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10182 return true;
10183
10184 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10185 // Do not to emit function if it is marked as declare target as it was already
10186 // emitted.
10187 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D)) {
10188 if (D->hasBody() && AlreadyEmittedTargetDecls.count(D) == 0) {
10189 if (auto *F = dyn_cast_or_null<llvm::Function>(
10190 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10191 return !F->isDeclaration();
10192 return false;
10193 }
10194 return true;
10195 }
10196
10197 return !AlreadyEmittedTargetDecls.insert(D).second;
10198}
10199
10200void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10201 const OMPExecutableDirective &D,
10202 SourceLocation Loc,
10203 llvm::Function *OutlinedFn,
10204 ArrayRef<llvm::Value *> CapturedVars) {
10205 if (!CGF.HaveInsertPoint())
10206 return;
10207
10208 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10209 CodeGenFunction::RunCleanupsScope Scope(CGF);
10210
10211 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10212 llvm::Value *Args[] = {
10213 RTLoc,
10214 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10215 OutlinedFn};
10216 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10217 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10218 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10219
10220 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10221 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10222 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10223}
10224
10225void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10226 const Expr *NumTeams,
10227 const Expr *ThreadLimit,
10228 SourceLocation Loc) {
10229 if (!CGF.HaveInsertPoint())
10230 return;
10231
10232 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10233
10234 llvm::Value *NumTeamsVal =
10235 NumTeams
10236 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10237 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10238 : CGF.Builder.getInt32(C: 0);
10239
10240 llvm::Value *ThreadLimitVal =
10241 ThreadLimit
10242 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10243 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10244 : CGF.Builder.getInt32(C: 0);
10245
10246 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10247 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10248 ThreadLimitVal};
10249 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10250 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10251 args: PushNumTeamsArgs);
10252}
10253
10254void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10255 const Expr *ThreadLimit,
10256 SourceLocation Loc) {
10257 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10258 llvm::Value *ThreadLimitVal =
10259 ThreadLimit
10260 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10261 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10262 : CGF.Builder.getInt32(C: 0);
10263
10264 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10265 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10266 ThreadLimitVal};
10267 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10268 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10269 args: ThreadLimitArgs);
10270}
10271
10272void CGOpenMPRuntime::emitTargetDataCalls(
10273 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10274 const Expr *Device, const RegionCodeGenTy &CodeGen,
10275 CGOpenMPRuntime::TargetDataInfo &Info) {
10276 if (!CGF.HaveInsertPoint())
10277 return;
10278
10279 // Action used to replace the default codegen action and turn privatization
10280 // off.
10281 PrePostActionTy NoPrivAction;
10282
10283 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10284
10285 llvm::Value *IfCondVal = nullptr;
10286 if (IfCond)
10287 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10288
10289 // Emit device ID if any.
10290 llvm::Value *DeviceID = nullptr;
10291 if (Device) {
10292 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10293 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10294 } else {
10295 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10296 }
10297
10298 // Fill up the arrays with all the mapped variables.
10299 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10300 auto GenMapInfoCB =
10301 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10302 CGF.Builder.restoreIP(IP: CodeGenIP);
10303 // Get map clause information.
10304 MappableExprsHandler MEHandler(D, CGF);
10305 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10306
10307 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10308 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10309 };
10310 if (CGM.getCodeGenOpts().getDebugInfo() !=
10311 llvm::codegenoptions::NoDebugInfo) {
10312 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10313 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10314 F: FillInfoMap);
10315 }
10316
10317 return CombinedInfo;
10318 };
10319 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10320 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10321 CGF.Builder.restoreIP(IP: CodeGenIP);
10322 switch (BodyGenType) {
10323 case BodyGenTy::Priv:
10324 if (!Info.CaptureDeviceAddrMap.empty())
10325 CodeGen(CGF);
10326 break;
10327 case BodyGenTy::DupNoPriv:
10328 if (!Info.CaptureDeviceAddrMap.empty()) {
10329 CodeGen.setAction(NoPrivAction);
10330 CodeGen(CGF);
10331 }
10332 break;
10333 case BodyGenTy::NoPriv:
10334 if (Info.CaptureDeviceAddrMap.empty()) {
10335 CodeGen.setAction(NoPrivAction);
10336 CodeGen(CGF);
10337 }
10338 break;
10339 }
10340 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10341 CGF.Builder.GetInsertPoint());
10342 };
10343
10344 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10345 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10346 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10347 }
10348 };
10349
10350 auto CustomMapperCB = [&](unsigned int I) {
10351 llvm::Function *MFunc = nullptr;
10352 if (CombinedInfo.Mappers[I]) {
10353 Info.HasMapper = true;
10354 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10355 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10356 }
10357 return MFunc;
10358 };
10359
10360 // Source location for the ident struct
10361 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10362
10363 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10364 CGF.AllocaInsertPt->getIterator());
10365 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10366 CGF.Builder.GetInsertPoint());
10367 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10368 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10369 cantFail(ValOrErr: OMPBuilder.createTargetData(
10370 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10371 CustomMapperCB,
10372 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
10373 CGF.Builder.restoreIP(IP: AfterIP);
10374}
10375
10376void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10377 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10378 const Expr *Device) {
10379 if (!CGF.HaveInsertPoint())
10380 return;
10381
10382 assert((isa<OMPTargetEnterDataDirective>(D) ||
10383 isa<OMPTargetExitDataDirective>(D) ||
10384 isa<OMPTargetUpdateDirective>(D)) &&
10385 "Expecting either target enter, exit data, or update directives.");
10386
10387 CodeGenFunction::OMPTargetDataInfo InputInfo;
10388 llvm::Value *MapTypesArray = nullptr;
10389 llvm::Value *MapNamesArray = nullptr;
10390 // Generate the code for the opening of the data environment.
10391 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10392 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10393 // Emit device ID if any.
10394 llvm::Value *DeviceID = nullptr;
10395 if (Device) {
10396 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10397 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10398 } else {
10399 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10400 }
10401
10402 // Emit the number of elements in the offloading arrays.
10403 llvm::Constant *PointerNum =
10404 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10405
10406 // Source location for the ident struct
10407 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10408
10409 SmallVector<llvm::Value *, 13> OffloadingArgs(
10410 {RTLoc, DeviceID, PointerNum,
10411 InputInfo.BasePointersArray.emitRawPointer(CGF),
10412 InputInfo.PointersArray.emitRawPointer(CGF),
10413 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10414 InputInfo.MappersArray.emitRawPointer(CGF)});
10415
10416 // Select the right runtime function call for each standalone
10417 // directive.
10418 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10419 RuntimeFunction RTLFn;
10420 switch (D.getDirectiveKind()) {
10421 case OMPD_target_enter_data:
10422 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10423 : OMPRTL___tgt_target_data_begin_mapper;
10424 break;
10425 case OMPD_target_exit_data:
10426 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10427 : OMPRTL___tgt_target_data_end_mapper;
10428 break;
10429 case OMPD_target_update:
10430 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10431 : OMPRTL___tgt_target_data_update_mapper;
10432 break;
10433 case OMPD_parallel:
10434 case OMPD_for:
10435 case OMPD_parallel_for:
10436 case OMPD_parallel_master:
10437 case OMPD_parallel_sections:
10438 case OMPD_for_simd:
10439 case OMPD_parallel_for_simd:
10440 case OMPD_cancel:
10441 case OMPD_cancellation_point:
10442 case OMPD_ordered:
10443 case OMPD_threadprivate:
10444 case OMPD_allocate:
10445 case OMPD_task:
10446 case OMPD_simd:
10447 case OMPD_tile:
10448 case OMPD_unroll:
10449 case OMPD_sections:
10450 case OMPD_section:
10451 case OMPD_single:
10452 case OMPD_master:
10453 case OMPD_critical:
10454 case OMPD_taskyield:
10455 case OMPD_barrier:
10456 case OMPD_taskwait:
10457 case OMPD_taskgroup:
10458 case OMPD_atomic:
10459 case OMPD_flush:
10460 case OMPD_depobj:
10461 case OMPD_scan:
10462 case OMPD_teams:
10463 case OMPD_target_data:
10464 case OMPD_distribute:
10465 case OMPD_distribute_simd:
10466 case OMPD_distribute_parallel_for:
10467 case OMPD_distribute_parallel_for_simd:
10468 case OMPD_teams_distribute:
10469 case OMPD_teams_distribute_simd:
10470 case OMPD_teams_distribute_parallel_for:
10471 case OMPD_teams_distribute_parallel_for_simd:
10472 case OMPD_declare_simd:
10473 case OMPD_declare_variant:
10474 case OMPD_begin_declare_variant:
10475 case OMPD_end_declare_variant:
10476 case OMPD_declare_target:
10477 case OMPD_end_declare_target:
10478 case OMPD_declare_reduction:
10479 case OMPD_declare_mapper:
10480 case OMPD_taskloop:
10481 case OMPD_taskloop_simd:
10482 case OMPD_master_taskloop:
10483 case OMPD_master_taskloop_simd:
10484 case OMPD_parallel_master_taskloop:
10485 case OMPD_parallel_master_taskloop_simd:
10486 case OMPD_target:
10487 case OMPD_target_simd:
10488 case OMPD_target_teams_distribute:
10489 case OMPD_target_teams_distribute_simd:
10490 case OMPD_target_teams_distribute_parallel_for:
10491 case OMPD_target_teams_distribute_parallel_for_simd:
10492 case OMPD_target_teams:
10493 case OMPD_target_parallel:
10494 case OMPD_target_parallel_for:
10495 case OMPD_target_parallel_for_simd:
10496 case OMPD_requires:
10497 case OMPD_metadirective:
10498 case OMPD_unknown:
10499 default:
10500 llvm_unreachable("Unexpected standalone target data directive.");
10501 break;
10502 }
10503 if (HasNowait) {
10504 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10505 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10506 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10507 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10508 }
10509 CGF.EmitRuntimeCall(
10510 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10511 args: OffloadingArgs);
10512 };
10513
10514 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10515 &MapNamesArray](CodeGenFunction &CGF,
10516 PrePostActionTy &) {
10517 // Fill up the arrays with all the mapped variables.
10518 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10519 CGOpenMPRuntime::TargetDataInfo Info;
10520 MappableExprsHandler MEHandler(D, CGF);
10521 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10522 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10523 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10524
10525 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10526 D.hasClausesOfKind<OMPNowaitClause>();
10527
10528 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10529 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10530 CGF.VoidPtrTy, CGM.getPointerAlign());
10531 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10532 CGM.getPointerAlign());
10533 InputInfo.SizesArray =
10534 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10535 InputInfo.MappersArray =
10536 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10537 MapTypesArray = Info.RTArgs.MapTypesArray;
10538 MapNamesArray = Info.RTArgs.MapNamesArray;
10539 if (RequiresOuterTask)
10540 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10541 else
10542 emitInlinedDirective(CGF, D.getDirectiveKind(), ThenGen);
10543 };
10544
10545 if (IfCond) {
10546 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10547 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10548 } else {
10549 RegionCodeGenTy ThenRCG(TargetThenGen);
10550 ThenRCG(CGF);
10551 }
10552}
10553
10554namespace {
10555 /// Kind of parameter in a function with 'declare simd' directive.
10556enum ParamKindTy {
10557 Linear,
10558 LinearRef,
10559 LinearUVal,
10560 LinearVal,
10561 Uniform,
10562 Vector,
10563};
10564/// Attribute set of the parameter.
10565struct ParamAttrTy {
10566 ParamKindTy Kind = Vector;
10567 llvm::APSInt StrideOrArg;
10568 llvm::APSInt Alignment;
10569 bool HasVarStride = false;
10570};
10571} // namespace
10572
10573static unsigned evaluateCDTSize(const FunctionDecl *FD,
10574 ArrayRef<ParamAttrTy> ParamAttrs) {
10575 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10576 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10577 // of that clause. The VLEN value must be power of 2.
10578 // In other case the notion of the function`s "characteristic data type" (CDT)
10579 // is used to compute the vector length.
10580 // CDT is defined in the following order:
10581 // a) For non-void function, the CDT is the return type.
10582 // b) If the function has any non-uniform, non-linear parameters, then the
10583 // CDT is the type of the first such parameter.
10584 // c) If the CDT determined by a) or b) above is struct, union, or class
10585 // type which is pass-by-value (except for the type that maps to the
10586 // built-in complex data type), the characteristic data type is int.
10587 // d) If none of the above three cases is applicable, the CDT is int.
10588 // The VLEN is then determined based on the CDT and the size of vector
10589 // register of that ISA for which current vector version is generated. The
10590 // VLEN is computed using the formula below:
10591 // VLEN = sizeof(vector_register) / sizeof(CDT),
10592 // where vector register size specified in section 3.2.1 Registers and the
10593 // Stack Frame of original AMD64 ABI document.
10594 QualType RetType = FD->getReturnType();
10595 if (RetType.isNull())
10596 return 0;
10597 ASTContext &C = FD->getASTContext();
10598 QualType CDT;
10599 if (!RetType.isNull() && !RetType->isVoidType()) {
10600 CDT = RetType;
10601 } else {
10602 unsigned Offset = 0;
10603 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10604 if (ParamAttrs[Offset].Kind == Vector)
10605 CDT = C.getPointerType(T: C.getRecordType(MD->getParent()));
10606 ++Offset;
10607 }
10608 if (CDT.isNull()) {
10609 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10610 if (ParamAttrs[I + Offset].Kind == Vector) {
10611 CDT = FD->getParamDecl(i: I)->getType();
10612 break;
10613 }
10614 }
10615 }
10616 }
10617 if (CDT.isNull())
10618 CDT = C.IntTy;
10619 CDT = CDT->getCanonicalTypeUnqualified();
10620 if (CDT->isRecordType() || CDT->isUnionType())
10621 CDT = C.IntTy;
10622 return C.getTypeSize(T: CDT);
10623}
10624
10625/// Mangle the parameter part of the vector function name according to
10626/// their OpenMP classification. The mangling function is defined in
10627/// section 4.5 of the AAVFABI(2021Q1).
10628static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10629 SmallString<256> Buffer;
10630 llvm::raw_svector_ostream Out(Buffer);
10631 for (const auto &ParamAttr : ParamAttrs) {
10632 switch (ParamAttr.Kind) {
10633 case Linear:
10634 Out << 'l';
10635 break;
10636 case LinearRef:
10637 Out << 'R';
10638 break;
10639 case LinearUVal:
10640 Out << 'U';
10641 break;
10642 case LinearVal:
10643 Out << 'L';
10644 break;
10645 case Uniform:
10646 Out << 'u';
10647 break;
10648 case Vector:
10649 Out << 'v';
10650 break;
10651 }
10652 if (ParamAttr.HasVarStride)
10653 Out << "s" << ParamAttr.StrideOrArg;
10654 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10655 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10656 // Don't print the step value if it is not present or if it is
10657 // equal to 1.
10658 if (ParamAttr.StrideOrArg < 0)
10659 Out << 'n' << -ParamAttr.StrideOrArg;
10660 else if (ParamAttr.StrideOrArg != 1)
10661 Out << ParamAttr.StrideOrArg;
10662 }
10663
10664 if (!!ParamAttr.Alignment)
10665 Out << 'a' << ParamAttr.Alignment;
10666 }
10667
10668 return std::string(Out.str());
10669}
10670
10671static void
10672emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10673 const llvm::APSInt &VLENVal,
10674 ArrayRef<ParamAttrTy> ParamAttrs,
10675 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10676 struct ISADataTy {
10677 char ISA;
10678 unsigned VecRegSize;
10679 };
10680 ISADataTy ISAData[] = {
10681 {
10682 .ISA: 'b', .VecRegSize: 128
10683 }, // SSE
10684 {
10685 .ISA: 'c', .VecRegSize: 256
10686 }, // AVX
10687 {
10688 .ISA: 'd', .VecRegSize: 256
10689 }, // AVX2
10690 {
10691 .ISA: 'e', .VecRegSize: 512
10692 }, // AVX512
10693 };
10694 llvm::SmallVector<char, 2> Masked;
10695 switch (State) {
10696 case OMPDeclareSimdDeclAttr::BS_Undefined:
10697 Masked.push_back(Elt: 'N');
10698 Masked.push_back(Elt: 'M');
10699 break;
10700 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10701 Masked.push_back(Elt: 'N');
10702 break;
10703 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10704 Masked.push_back(Elt: 'M');
10705 break;
10706 }
10707 for (char Mask : Masked) {
10708 for (const ISADataTy &Data : ISAData) {
10709 SmallString<256> Buffer;
10710 llvm::raw_svector_ostream Out(Buffer);
10711 Out << "_ZGV" << Data.ISA << Mask;
10712 if (!VLENVal) {
10713 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10714 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10715 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10716 } else {
10717 Out << VLENVal;
10718 }
10719 Out << mangleVectorParameters(ParamAttrs);
10720 Out << '_' << Fn->getName();
10721 Fn->addFnAttr(Kind: Out.str());
10722 }
10723 }
10724}
10725
10726// This are the Functions that are needed to mangle the name of the
10727// vector functions generated by the compiler, according to the rules
10728// defined in the "Vector Function ABI specifications for AArch64",
10729// available at
10730// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10731
10732/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10733static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10734 QT = QT.getCanonicalType();
10735
10736 if (QT->isVoidType())
10737 return false;
10738
10739 if (Kind == ParamKindTy::Uniform)
10740 return false;
10741
10742 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10743 return false;
10744
10745 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10746 !QT->isReferenceType())
10747 return false;
10748
10749 return true;
10750}
10751
10752/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10753static bool getAArch64PBV(QualType QT, ASTContext &C) {
10754 QT = QT.getCanonicalType();
10755 unsigned Size = C.getTypeSize(T: QT);
10756
10757 // Only scalars and complex within 16 bytes wide set PVB to true.
10758 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10759 return false;
10760
10761 if (QT->isFloatingType())
10762 return true;
10763
10764 if (QT->isIntegerType())
10765 return true;
10766
10767 if (QT->isPointerType())
10768 return true;
10769
10770 // TODO: Add support for complex types (section 3.1.2, item 2).
10771
10772 return false;
10773}
10774
10775/// Computes the lane size (LS) of a return type or of an input parameter,
10776/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10777/// TODO: Add support for references, section 3.2.1, item 1.
10778static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10779 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10780 QualType PTy = QT.getCanonicalType()->getPointeeType();
10781 if (getAArch64PBV(QT: PTy, C))
10782 return C.getTypeSize(T: PTy);
10783 }
10784 if (getAArch64PBV(QT, C))
10785 return C.getTypeSize(T: QT);
10786
10787 return C.getTypeSize(T: C.getUIntPtrType());
10788}
10789
10790// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10791// signature of the scalar function, as defined in 3.2.2 of the
10792// AAVFABI.
10793static std::tuple<unsigned, unsigned, bool>
10794getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10795 QualType RetType = FD->getReturnType().getCanonicalType();
10796
10797 ASTContext &C = FD->getASTContext();
10798
10799 bool OutputBecomesInput = false;
10800
10801 llvm::SmallVector<unsigned, 8> Sizes;
10802 if (!RetType->isVoidType()) {
10803 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10804 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10805 OutputBecomesInput = true;
10806 }
10807 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10808 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10809 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10810 }
10811
10812 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10813 // The LS of a function parameter / return value can only be a power
10814 // of 2, starting from 8 bits, up to 128.
10815 assert(llvm::all_of(Sizes,
10816 [](unsigned Size) {
10817 return Size == 8 || Size == 16 || Size == 32 ||
10818 Size == 64 || Size == 128;
10819 }) &&
10820 "Invalid size");
10821
10822 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
10823 args&: OutputBecomesInput);
10824}
10825
10826// Function used to add the attribute. The parameter `VLEN` is
10827// templated to allow the use of "x" when targeting scalable functions
10828// for SVE.
10829template <typename T>
10830static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10831 char ISA, StringRef ParSeq,
10832 StringRef MangledName, bool OutputBecomesInput,
10833 llvm::Function *Fn) {
10834 SmallString<256> Buffer;
10835 llvm::raw_svector_ostream Out(Buffer);
10836 Out << Prefix << ISA << LMask << VLEN;
10837 if (OutputBecomesInput)
10838 Out << "v";
10839 Out << ParSeq << "_" << MangledName;
10840 Fn->addFnAttr(Kind: Out.str());
10841}
10842
10843// Helper function to generate the Advanced SIMD names depending on
10844// the value of the NDS when simdlen is not present.
10845static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10846 StringRef Prefix, char ISA,
10847 StringRef ParSeq, StringRef MangledName,
10848 bool OutputBecomesInput,
10849 llvm::Function *Fn) {
10850 switch (NDS) {
10851 case 8:
10852 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10853 OutputBecomesInput, Fn);
10854 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10855 OutputBecomesInput, Fn);
10856 break;
10857 case 16:
10858 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10859 OutputBecomesInput, Fn);
10860 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10861 OutputBecomesInput, Fn);
10862 break;
10863 case 32:
10864 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10865 OutputBecomesInput, Fn);
10866 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10867 OutputBecomesInput, Fn);
10868 break;
10869 case 64:
10870 case 128:
10871 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10872 OutputBecomesInput, Fn);
10873 break;
10874 default:
10875 llvm_unreachable("Scalar type is too wide.");
10876 }
10877}
10878
10879/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10880static void emitAArch64DeclareSimdFunction(
10881 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10882 ArrayRef<ParamAttrTy> ParamAttrs,
10883 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10884 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10885
10886 // Get basic data for building the vector signature.
10887 const auto Data = getNDSWDS(FD, ParamAttrs);
10888 const unsigned NDS = std::get<0>(t: Data);
10889 const unsigned WDS = std::get<1>(t: Data);
10890 const bool OutputBecomesInput = std::get<2>(t: Data);
10891
10892 // Check the values provided via `simdlen` by the user.
10893 // 1. A `simdlen(1)` doesn't produce vector signatures,
10894 if (UserVLEN == 1) {
10895 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10896 L: DiagnosticsEngine::Warning,
10897 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10898 CGM.getDiags().Report(Loc: SLoc, DiagID);
10899 return;
10900 }
10901
10902 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10903 // Advanced SIMD output.
10904 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10905 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10906 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10907 "power of 2 when targeting Advanced SIMD.");
10908 CGM.getDiags().Report(Loc: SLoc, DiagID);
10909 return;
10910 }
10911
10912 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10913 // limits.
10914 if (ISA == 's' && UserVLEN != 0) {
10915 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10916 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10917 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10918 "lanes in the architectural constraints "
10919 "for SVE (min is 128-bit, max is "
10920 "2048-bit, by steps of 128-bit)");
10921 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10922 return;
10923 }
10924 }
10925
10926 // Sort out parameter sequence.
10927 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10928 StringRef Prefix = "_ZGV";
10929 // Generate simdlen from user input (if any).
10930 if (UserVLEN) {
10931 if (ISA == 's') {
10932 // SVE generates only a masked function.
10933 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10934 OutputBecomesInput, Fn);
10935 } else {
10936 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10937 // Advanced SIMD generates one or two functions, depending on
10938 // the `[not]inbranch` clause.
10939 switch (State) {
10940 case OMPDeclareSimdDeclAttr::BS_Undefined:
10941 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10942 OutputBecomesInput, Fn);
10943 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10944 OutputBecomesInput, Fn);
10945 break;
10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10947 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10948 OutputBecomesInput, Fn);
10949 break;
10950 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10951 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10952 OutputBecomesInput, Fn);
10953 break;
10954 }
10955 }
10956 } else {
10957 // If no user simdlen is provided, follow the AAVFABI rules for
10958 // generating the vector length.
10959 if (ISA == 's') {
10960 // SVE, section 3.4.1, item 1.
10961 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
10962 OutputBecomesInput, Fn);
10963 } else {
10964 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10965 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10966 // two vector names depending on the use of the clause
10967 // `[not]inbranch`.
10968 switch (State) {
10969 case OMPDeclareSimdDeclAttr::BS_Undefined:
10970 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10971 OutputBecomesInput, Fn);
10972 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10973 OutputBecomesInput, Fn);
10974 break;
10975 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10976 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
10977 OutputBecomesInput, Fn);
10978 break;
10979 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10980 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
10981 OutputBecomesInput, Fn);
10982 break;
10983 }
10984 }
10985 }
10986}
10987
10988void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
10989 llvm::Function *Fn) {
10990 ASTContext &C = CGM.getContext();
10991 FD = FD->getMostRecentDecl();
10992 while (FD) {
10993 // Map params to their positions in function decl.
10994 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
10995 if (isa<CXXMethodDecl>(Val: FD))
10996 ParamPositions.try_emplace(FD, 0);
10997 unsigned ParamPos = ParamPositions.size();
10998 for (const ParmVarDecl *P : FD->parameters()) {
10999 ParamPositions.try_emplace(P->getCanonicalDecl(), ParamPos);
11000 ++ParamPos;
11001 }
11002 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11003 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11004 // Mark uniform parameters.
11005 for (const Expr *E : Attr->uniforms()) {
11006 E = E->IgnoreParenImpCasts();
11007 unsigned Pos;
11008 if (isa<CXXThisExpr>(E)) {
11009 Pos = ParamPositions[FD];
11010 } else {
11011 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11012 ->getCanonicalDecl();
11013 auto It = ParamPositions.find(PVD);
11014 assert(It != ParamPositions.end() && "Function parameter not found");
11015 Pos = It->second;
11016 }
11017 ParamAttrs[Pos].Kind = Uniform;
11018 }
11019 // Get alignment info.
11020 auto *NI = Attr->alignments_begin();
11021 for (const Expr *E : Attr->aligneds()) {
11022 E = E->IgnoreParenImpCasts();
11023 unsigned Pos;
11024 QualType ParmTy;
11025 if (isa<CXXThisExpr>(E)) {
11026 Pos = ParamPositions[FD];
11027 ParmTy = E->getType();
11028 } else {
11029 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11030 ->getCanonicalDecl();
11031 auto It = ParamPositions.find(PVD);
11032 assert(It != ParamPositions.end() && "Function parameter not found");
11033 Pos = It->second;
11034 ParmTy = PVD->getType();
11035 }
11036 ParamAttrs[Pos].Alignment =
11037 (*NI)
11038 ? (*NI)->EvaluateKnownConstInt(C)
11039 : llvm::APSInt::getUnsigned(
11040 C.toCharUnitsFromBits(C.getOpenMPDefaultSimdAlign(ParmTy))
11041 .getQuantity());
11042 ++NI;
11043 }
11044 // Mark linear parameters.
11045 auto *SI = Attr->steps_begin();
11046 auto *MI = Attr->modifiers_begin();
11047 for (const Expr *E : Attr->linears()) {
11048 E = E->IgnoreParenImpCasts();
11049 unsigned Pos;
11050 bool IsReferenceType = false;
11051 // Rescaling factor needed to compute the linear parameter
11052 // value in the mangled name.
11053 unsigned PtrRescalingFactor = 1;
11054 if (isa<CXXThisExpr>(E)) {
11055 Pos = ParamPositions[FD];
11056 auto *P = cast<PointerType>(E->getType());
11057 PtrRescalingFactor = CGM.getContext()
11058 .getTypeSizeInChars(P->getPointeeType())
11059 .getQuantity();
11060 } else {
11061 const auto *PVD = cast<ParmVarDecl>(cast<DeclRefExpr>(E)->getDecl())
11062 ->getCanonicalDecl();
11063 auto It = ParamPositions.find(PVD);
11064 assert(It != ParamPositions.end() && "Function parameter not found");
11065 Pos = It->second;
11066 if (auto *P = dyn_cast<PointerType>(PVD->getType()))
11067 PtrRescalingFactor = CGM.getContext()
11068 .getTypeSizeInChars(P->getPointeeType())
11069 .getQuantity();
11070 else if (PVD->getType()->isReferenceType()) {
11071 IsReferenceType = true;
11072 PtrRescalingFactor =
11073 CGM.getContext()
11074 .getTypeSizeInChars(PVD->getType().getNonReferenceType())
11075 .getQuantity();
11076 }
11077 }
11078 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11079 if (*MI == OMPC_LINEAR_ref)
11080 ParamAttr.Kind = LinearRef;
11081 else if (*MI == OMPC_LINEAR_uval)
11082 ParamAttr.Kind = LinearUVal;
11083 else if (IsReferenceType)
11084 ParamAttr.Kind = LinearVal;
11085 else
11086 ParamAttr.Kind = Linear;
11087 // Assuming a stride of 1, for `linear` without modifiers.
11088 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(1);
11089 if (*SI) {
11090 Expr::EvalResult Result;
11091 if (!(*SI)->EvaluateAsInt(Result, C, Expr::SE_AllowSideEffects)) {
11092 if (const auto *DRE =
11093 cast<DeclRefExpr>((*SI)->IgnoreParenImpCasts())) {
11094 if (const auto *StridePVD =
11095 dyn_cast<ParmVarDecl>(DRE->getDecl())) {
11096 ParamAttr.HasVarStride = true;
11097 auto It = ParamPositions.find(StridePVD->getCanonicalDecl());
11098 assert(It != ParamPositions.end() &&
11099 "Function parameter not found");
11100 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(It->second);
11101 }
11102 }
11103 } else {
11104 ParamAttr.StrideOrArg = Result.Val.getInt();
11105 }
11106 }
11107 // If we are using a linear clause on a pointer, we need to
11108 // rescale the value of linear_step with the byte size of the
11109 // pointee type.
11110 if (!ParamAttr.HasVarStride &&
11111 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11112 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11113 ++SI;
11114 ++MI;
11115 }
11116 llvm::APSInt VLENVal;
11117 SourceLocation ExprLoc;
11118 const Expr *VLENExpr = Attr->getSimdlen();
11119 if (VLENExpr) {
11120 VLENVal = VLENExpr->EvaluateKnownConstInt(C);
11121 ExprLoc = VLENExpr->getExprLoc();
11122 }
11123 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11124 if (CGM.getTriple().isX86()) {
11125 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11126 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11127 unsigned VLEN = VLENVal.getExtValue();
11128 StringRef MangledName = Fn->getName();
11129 if (CGM.getTarget().hasFeature("sve"))
11130 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11131 MangledName, 's', 128, Fn, ExprLoc);
11132 else if (CGM.getTarget().hasFeature("neon"))
11133 emitAArch64DeclareSimdFunction(CGM, FD, VLEN, ParamAttrs, State,
11134 MangledName, 'n', 128, Fn, ExprLoc);
11135 }
11136 }
11137 FD = FD->getPreviousDecl();
11138 }
11139}
11140
11141namespace {
11142/// Cleanup action for doacross support.
11143class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11144public:
11145 static const int DoacrossFinArgs = 2;
11146
11147private:
11148 llvm::FunctionCallee RTLFn;
11149 llvm::Value *Args[DoacrossFinArgs];
11150
11151public:
11152 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11153 ArrayRef<llvm::Value *> CallArgs)
11154 : RTLFn(RTLFn) {
11155 assert(CallArgs.size() == DoacrossFinArgs);
11156 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
11157 }
11158 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11159 if (!CGF.HaveInsertPoint())
11160 return;
11161 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11162 }
11163};
11164} // namespace
11165
11166void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11167 const OMPLoopDirective &D,
11168 ArrayRef<Expr *> NumIterations) {
11169 if (!CGF.HaveInsertPoint())
11170 return;
11171
11172 ASTContext &C = CGM.getContext();
11173 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11174 RecordDecl *RD;
11175 if (KmpDimTy.isNull()) {
11176 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11177 // kmp_int64 lo; // lower
11178 // kmp_int64 up; // upper
11179 // kmp_int64 st; // stride
11180 // };
11181 RD = C.buildImplicitRecord(Name: "kmp_dim");
11182 RD->startDefinition();
11183 addFieldToRecordDecl(C, RD, Int64Ty);
11184 addFieldToRecordDecl(C, RD, Int64Ty);
11185 addFieldToRecordDecl(C, RD, Int64Ty);
11186 RD->completeDefinition();
11187 KmpDimTy = C.getRecordType(RD);
11188 } else {
11189 RD = cast<RecordDecl>(KmpDimTy->getAsTagDecl());
11190 }
11191 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11192 QualType ArrayTy = C.getConstantArrayType(KmpDimTy, Size, nullptr,
11193 ArraySizeModifier::Normal, 0);
11194
11195 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11196 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11197 enum { LowerFD = 0, UpperFD, StrideFD };
11198 // Fill dims with data.
11199 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11200 LValue DimsLVal = CGF.MakeAddrLValue(
11201 CGF.Builder.CreateConstArrayGEP(DimsAddr, I), KmpDimTy);
11202 // dims.upper = num_iterations;
11203 LValue UpperLVal = CGF.EmitLValueForField(
11204 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11205 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11206 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11207 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11208 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11209 // dims.stride = 1;
11210 LValue StrideLVal = CGF.EmitLValueForField(
11211 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11212 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11213 lvalue: StrideLVal);
11214 }
11215
11216 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11217 // kmp_int32 num_dims, struct kmp_dim * dims);
11218 llvm::Value *Args[] = {
11219 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11220 getThreadID(CGF, Loc: D.getBeginLoc()),
11221 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11222 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11223 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
11224 DestTy: CGM.VoidPtrTy)};
11225
11226 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11227 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11228 CGF.EmitRuntimeCall(RTLFn, Args);
11229 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11230 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11231 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11232 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11233 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11234 A: llvm::ArrayRef(FiniArgs));
11235}
11236
11237template <typename T>
11238static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11239 const T *C, llvm::Value *ULoc,
11240 llvm::Value *ThreadID) {
11241 QualType Int64Ty =
11242 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11243 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11244 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11245 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11246 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11247 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11248 const Expr *CounterVal = C->getLoopData(I);
11249 assert(CounterVal);
11250 llvm::Value *CntVal = CGF.EmitScalarConversion(
11251 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11252 Loc: CounterVal->getExprLoc());
11253 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11254 /*Volatile=*/false, Ty: Int64Ty);
11255 }
11256 llvm::Value *Args[] = {
11257 ULoc, ThreadID,
11258 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
11259 llvm::FunctionCallee RTLFn;
11260 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11261 OMPDoacrossKind<T> ODK;
11262 if (ODK.isSource(C)) {
11263 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11264 FnID: OMPRTL___kmpc_doacross_post);
11265 } else {
11266 assert(ODK.isSink(C) && "Expect sink modifier.");
11267 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11268 FnID: OMPRTL___kmpc_doacross_wait);
11269 }
11270 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11271}
11272
11273void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11274 const OMPDependClause *C) {
11275 return EmitDoacrossOrdered<OMPDependClause>(
11276 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11277 getThreadID(CGF, Loc: C->getBeginLoc()));
11278}
11279
11280void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11281 const OMPDoacrossClause *C) {
11282 return EmitDoacrossOrdered<OMPDoacrossClause>(
11283 CGF, CGM, C, emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11284 getThreadID(CGF, Loc: C->getBeginLoc()));
11285}
11286
11287void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11288 llvm::FunctionCallee Callee,
11289 ArrayRef<llvm::Value *> Args) const {
11290 assert(Loc.isValid() && "Outlined function call location must be valid.");
11291 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11292
11293 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11294 if (Fn->doesNotThrow()) {
11295 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11296 return;
11297 }
11298 }
11299 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11300}
11301
11302void CGOpenMPRuntime::emitOutlinedFunctionCall(
11303 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11304 ArrayRef<llvm::Value *> Args) const {
11305 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11306}
11307
11308void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11309 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11310 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD))
11311 HasEmittedDeclareTargetRegion = true;
11312}
11313
11314Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11315 const VarDecl *NativeParam,
11316 const VarDecl *TargetParam) const {
11317 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11318}
11319
11320/// Return allocator value from expression, or return a null allocator (default
11321/// when no allocator specified).
11322static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11323 const Expr *Allocator) {
11324 llvm::Value *AllocVal;
11325 if (Allocator) {
11326 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11327 // According to the standard, the original allocator type is a enum
11328 // (integer). Convert to pointer type, if required.
11329 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11330 DstTy: CGF.getContext().VoidPtrTy,
11331 Loc: Allocator->getExprLoc());
11332 } else {
11333 // If no allocator specified, it defaults to the null allocator.
11334 AllocVal = llvm::Constant::getNullValue(
11335 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11336 }
11337 return AllocVal;
11338}
11339
11340/// Return the alignment from an allocate directive if present.
11341static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11342 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11343
11344 if (!AllocateAlignment)
11345 return nullptr;
11346
11347 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11348}
11349
11350Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11351 const VarDecl *VD) {
11352 if (!VD)
11353 return Address::invalid();
11354 Address UntiedAddr = Address::invalid();
11355 Address UntiedRealAddr = Address::invalid();
11356 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11357 if (It != FunctionToUntiedTaskStackMap.end()) {
11358 const UntiedLocalVarsAddressesMap &UntiedData =
11359 UntiedLocalVarsStack[It->second];
11360 auto I = UntiedData.find(Key: VD);
11361 if (I != UntiedData.end()) {
11362 UntiedAddr = I->second.first;
11363 UntiedRealAddr = I->second.second;
11364 }
11365 }
11366 const VarDecl *CVD = VD->getCanonicalDecl();
11367 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11368 // Use the default allocation.
11369 if (!isAllocatableDecl(VD))
11370 return UntiedAddr;
11371 llvm::Value *Size;
11372 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
11373 if (CVD->getType()->isVariablyModifiedType()) {
11374 Size = CGF.getTypeSize(Ty: CVD->getType());
11375 // Align the size: ((size + align - 1) / align) * align
11376 Size = CGF.Builder.CreateNUWAdd(
11377 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11378 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11379 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11380 } else {
11381 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
11382 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11383 }
11384 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11385 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11386 const Expr *Allocator = AA->getAllocator();
11387 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11388 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11389 SmallVector<llvm::Value *, 4> Args;
11390 Args.push_back(Elt: ThreadID);
11391 if (Alignment)
11392 Args.push_back(Elt: Alignment);
11393 Args.push_back(Elt: Size);
11394 Args.push_back(Elt: AllocVal);
11395 llvm::omp::RuntimeFunction FnID =
11396 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11397 llvm::Value *Addr = CGF.EmitRuntimeCall(
11398 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), Args,
11399 getName(Parts: {CVD->getName(), ".void.addr"}));
11400 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11401 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11402 QualType Ty = CGM.getContext().getPointerType(CVD->getType());
11403 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11404 Addr, CGF.ConvertTypeForMem(T: Ty), getName(Parts: {CVD->getName(), ".addr"}));
11405 if (UntiedAddr.isValid())
11406 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11407
11408 // Cleanup action for allocate support.
11409 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11410 llvm::FunctionCallee RTLFn;
11411 SourceLocation::UIntTy LocEncoding;
11412 Address Addr;
11413 const Expr *AllocExpr;
11414
11415 public:
11416 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11417 SourceLocation::UIntTy LocEncoding, Address Addr,
11418 const Expr *AllocExpr)
11419 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11420 AllocExpr(AllocExpr) {}
11421 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11422 if (!CGF.HaveInsertPoint())
11423 return;
11424 llvm::Value *Args[3];
11425 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11426 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11427 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11428 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
11429 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11430 Args[2] = AllocVal;
11431 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11432 }
11433 };
11434 Address VDAddr =
11435 UntiedRealAddr.isValid()
11436 ? UntiedRealAddr
11437 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11438 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11439 NormalAndEHCleanup, FiniRTLFn, CVD->getLocation().getRawEncoding(),
11440 VDAddr, Allocator);
11441 if (UntiedRealAddr.isValid())
11442 if (auto *Region =
11443 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11444 Region->emitUntiedSwitch(CGF);
11445 return VDAddr;
11446 }
11447 return UntiedAddr;
11448}
11449
11450bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11451 const VarDecl *VD) const {
11452 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11453 if (It == FunctionToUntiedTaskStackMap.end())
11454 return false;
11455 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11456}
11457
11458CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11459 CodeGenModule &CGM, const OMPLoopDirective &S)
11460 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11461 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11462 if (!NeedToPush)
11463 return;
11464 NontemporalDeclsSet &DS =
11465 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11466 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11467 for (const Stmt *Ref : C->private_refs()) {
11468 const auto *SimpleRefExpr = cast<Expr>(Ref)->IgnoreParenImpCasts();
11469 const ValueDecl *VD;
11470 if (const auto *DRE = dyn_cast<DeclRefExpr>(SimpleRefExpr)) {
11471 VD = DRE->getDecl();
11472 } else {
11473 const auto *ME = cast<MemberExpr>(SimpleRefExpr);
11474 assert((ME->isImplicitCXXThis() ||
11475 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11476 "Expected member of current class.");
11477 VD = ME->getMemberDecl();
11478 }
11479 DS.insert(VD);
11480 }
11481 }
11482}
11483
11484CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11485 if (!NeedToPush)
11486 return;
11487 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11488}
11489
11490CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11491 CodeGenFunction &CGF,
11492 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11493 std::pair<Address, Address>> &LocalVars)
11494 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11495 if (!NeedToPush)
11496 return;
11497 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11498 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11499 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11500}
11501
11502CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11503 if (!NeedToPush)
11504 return;
11505 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11506}
11507
11508bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11509 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11510
11511 return llvm::any_of(
11512 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11513 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(VD); });
11514}
11515
11516void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11517 const OMPExecutableDirective &S,
11518 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11519 const {
11520 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11521 // Vars in target/task regions must be excluded completely.
11522 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()) ||
11523 isOpenMPTaskingDirective(S.getDirectiveKind())) {
11524 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11525 getOpenMPCaptureRegions(CaptureRegions, S.getDirectiveKind());
11526 const CapturedStmt *CS = S.getCapturedStmt(CaptureRegions.front());
11527 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11528 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11529 NeedToCheckForLPCs.insert(Cap.getCapturedVar());
11530 }
11531 }
11532 // Exclude vars in private clauses.
11533 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11534 for (const Expr *Ref : C->varlist()) {
11535 if (!Ref->getType()->isScalarType())
11536 continue;
11537 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11538 if (!DRE)
11539 continue;
11540 NeedToCheckForLPCs.insert(DRE->getDecl());
11541 }
11542 }
11543 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11544 for (const Expr *Ref : C->varlist()) {
11545 if (!Ref->getType()->isScalarType())
11546 continue;
11547 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11548 if (!DRE)
11549 continue;
11550 NeedToCheckForLPCs.insert(DRE->getDecl());
11551 }
11552 }
11553 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11554 for (const Expr *Ref : C->varlist()) {
11555 if (!Ref->getType()->isScalarType())
11556 continue;
11557 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11558 if (!DRE)
11559 continue;
11560 NeedToCheckForLPCs.insert(DRE->getDecl());
11561 }
11562 }
11563 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11564 for (const Expr *Ref : C->varlist()) {
11565 if (!Ref->getType()->isScalarType())
11566 continue;
11567 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11568 if (!DRE)
11569 continue;
11570 NeedToCheckForLPCs.insert(DRE->getDecl());
11571 }
11572 }
11573 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11574 for (const Expr *Ref : C->varlist()) {
11575 if (!Ref->getType()->isScalarType())
11576 continue;
11577 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
11578 if (!DRE)
11579 continue;
11580 NeedToCheckForLPCs.insert(DRE->getDecl());
11581 }
11582 }
11583 for (const Decl *VD : NeedToCheckForLPCs) {
11584 for (const LastprivateConditionalData &Data :
11585 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11586 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11587 if (!Data.Disabled)
11588 NeedToAddForLPCsAsDisabled.insert(V: VD);
11589 break;
11590 }
11591 }
11592 }
11593}
11594
11595CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11596 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11597 : CGM(CGF.CGM),
11598 Action((CGM.getLangOpts().OpenMP >= 50 &&
11599 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11600 P: [](const OMPLastprivateClause *C) {
11601 return C->getKind() ==
11602 OMPC_LASTPRIVATE_conditional;
11603 }))
11604 ? ActionToDo::PushAsLastprivateConditional
11605 : ActionToDo::DoNotPush) {
11606 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11607 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11608 return;
11609 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11610 "Expected a push action.");
11611 LastprivateConditionalData &Data =
11612 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11613 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11614 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11615 continue;
11616
11617 for (const Expr *Ref : C->varlist()) {
11618 Data.DeclToUniqueName.insert(std::make_pair(
11619 cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11620 SmallString<16>(generateUniqueName(CGM, "pl_cond", Ref))));
11621 }
11622 }
11623 Data.IVLVal = IVLVal;
11624 Data.Fn = CGF.CurFn;
11625}
11626
11627CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11628 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11629 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11630 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11631 if (CGM.getLangOpts().OpenMP < 50)
11632 return;
11633 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11634 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11635 if (!NeedToAddForLPCsAsDisabled.empty()) {
11636 Action = ActionToDo::DisableLastprivateConditional;
11637 LastprivateConditionalData &Data =
11638 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11639 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11640 Data.DeclToUniqueName.try_emplace(Key: VD);
11641 Data.Fn = CGF.CurFn;
11642 Data.Disabled = true;
11643 }
11644}
11645
11646CGOpenMPRuntime::LastprivateConditionalRAII
11647CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11648 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11649 return LastprivateConditionalRAII(CGF, S);
11650}
11651
11652CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11653 if (CGM.getLangOpts().OpenMP < 50)
11654 return;
11655 if (Action == ActionToDo::DisableLastprivateConditional) {
11656 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11657 "Expected list of disabled private vars.");
11658 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11659 }
11660 if (Action == ActionToDo::PushAsLastprivateConditional) {
11661 assert(
11662 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11663 "Expected list of lastprivate conditional vars.");
11664 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11665 }
11666}
11667
11668Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11669 const VarDecl *VD) {
11670 ASTContext &C = CGM.getContext();
11671 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11672 QualType NewType;
11673 const FieldDecl *VDField;
11674 const FieldDecl *FiredField;
11675 LValue BaseLVal;
11676 auto VI = I->getSecond().find(VD);
11677 if (VI == I->getSecond().end()) {
11678 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11679 RD->startDefinition();
11680 VDField = addFieldToRecordDecl(C, RD, VD->getType().getNonReferenceType());
11681 FiredField = addFieldToRecordDecl(C, RD, C.CharTy);
11682 RD->completeDefinition();
11683 NewType = C.getRecordType(Decl: RD);
11684 Address Addr = CGF.CreateMemTemp(NewType, C.getDeclAlign(VD), VD->getName());
11685 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11686 I->getSecond().try_emplace(VD, NewType, VDField, FiredField, BaseLVal);
11687 } else {
11688 NewType = std::get<0>(VI->getSecond());
11689 VDField = std::get<1>(VI->getSecond());
11690 FiredField = std::get<2>(VI->getSecond());
11691 BaseLVal = std::get<3>(VI->getSecond());
11692 }
11693 LValue FiredLVal =
11694 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11695 CGF.EmitStoreOfScalar(
11696 llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11697 FiredLVal);
11698 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
11699}
11700
11701namespace {
11702/// Checks if the lastprivate conditional variable is referenced in LHS.
11703class LastprivateConditionalRefChecker final
11704 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11705 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11706 const Expr *FoundE = nullptr;
11707 const Decl *FoundD = nullptr;
11708 StringRef UniqueDeclName;
11709 LValue IVLVal;
11710 llvm::Function *FoundFn = nullptr;
11711 SourceLocation Loc;
11712
11713public:
11714 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11715 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11716 llvm::reverse(C&: LPM)) {
11717 auto It = D.DeclToUniqueName.find(E->getDecl());
11718 if (It == D.DeclToUniqueName.end())
11719 continue;
11720 if (D.Disabled)
11721 return false;
11722 FoundE = E;
11723 FoundD = E->getDecl()->getCanonicalDecl();
11724 UniqueDeclName = It->second;
11725 IVLVal = D.IVLVal;
11726 FoundFn = D.Fn;
11727 break;
11728 }
11729 return FoundE == E;
11730 }
11731 bool VisitMemberExpr(const MemberExpr *E) {
11732 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11733 return false;
11734 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11735 llvm::reverse(C&: LPM)) {
11736 auto It = D.DeclToUniqueName.find(E->getMemberDecl());
11737 if (It == D.DeclToUniqueName.end())
11738 continue;
11739 if (D.Disabled)
11740 return false;
11741 FoundE = E;
11742 FoundD = E->getMemberDecl()->getCanonicalDecl();
11743 UniqueDeclName = It->second;
11744 IVLVal = D.IVLVal;
11745 FoundFn = D.Fn;
11746 break;
11747 }
11748 return FoundE == E;
11749 }
11750 bool VisitStmt(const Stmt *S) {
11751 for (const Stmt *Child : S->children()) {
11752 if (!Child)
11753 continue;
11754 if (const auto *E = dyn_cast<Expr>(Val: Child))
11755 if (!E->isGLValue())
11756 continue;
11757 if (Visit(Child))
11758 return true;
11759 }
11760 return false;
11761 }
11762 explicit LastprivateConditionalRefChecker(
11763 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11764 : LPM(LPM) {}
11765 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11766 getFoundData() const {
11767 return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, FoundFn);
11768 }
11769};
11770} // namespace
11771
11772void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11773 LValue IVLVal,
11774 StringRef UniqueDeclName,
11775 LValue LVal,
11776 SourceLocation Loc) {
11777 // Last updated loop counter for the lastprivate conditional var.
11778 // int<xx> last_iv = 0;
11779 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11780 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11781 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11782 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11783 IVLVal.getAlignment().getAsAlign());
11784 LValue LastIVLVal =
11785 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
11786
11787 // Last value of the lastprivate conditional.
11788 // decltype(priv_a) last_a;
11789 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11790 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11791 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
11792 LVal.getAlignment().getAsAlign());
11793 LValue LastLVal =
11794 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
11795
11796 // Global loop counter. Required to handle inner parallel-for regions.
11797 // iv
11798 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11799
11800 // #pragma omp critical(a)
11801 // if (last_iv <= iv) {
11802 // last_iv = iv;
11803 // last_a = priv_a;
11804 // }
11805 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11806 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11807 Action.Enter(CGF);
11808 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11809 // (last_iv <= iv) ? Check if the variable is updated and store new
11810 // value in global var.
11811 llvm::Value *CmpRes;
11812 if (IVLVal.getType()->isSignedIntegerType()) {
11813 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11814 } else {
11815 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11816 "Loop iteration variable must be integer.");
11817 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11818 }
11819 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11820 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11821 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11822 // {
11823 CGF.EmitBlock(BB: ThenBB);
11824
11825 // last_iv = iv;
11826 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11827
11828 // last_a = priv_a;
11829 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11830 case TEK_Scalar: {
11831 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11832 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11833 break;
11834 }
11835 case TEK_Complex: {
11836 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11837 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11838 break;
11839 }
11840 case TEK_Aggregate:
11841 llvm_unreachable(
11842 "Aggregates are not supported in lastprivate conditional.");
11843 }
11844 // }
11845 CGF.EmitBranch(Block: ExitBB);
11846 // There is no need to emit line number for unconditional branch.
11847 (void)ApplyDebugLocation::CreateEmpty(CGF);
11848 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11849 };
11850
11851 if (CGM.getLangOpts().OpenMPSimd) {
11852 // Do not emit as a critical region as no parallel region could be emitted.
11853 RegionCodeGenTy ThenRCG(CodeGen);
11854 ThenRCG(CGF);
11855 } else {
11856 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11857 }
11858}
11859
11860void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11861 const Expr *LHS) {
11862 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11863 return;
11864 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11865 if (!Checker.Visit(LHS))
11866 return;
11867 const Expr *FoundE;
11868 const Decl *FoundD;
11869 StringRef UniqueDeclName;
11870 LValue IVLVal;
11871 llvm::Function *FoundFn;
11872 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11873 Checker.getFoundData();
11874 if (FoundFn != CGF.CurFn) {
11875 // Special codegen for inner parallel regions.
11876 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11877 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11878 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11879 "Lastprivate conditional is not found in outer region.");
11880 QualType StructTy = std::get<0>(t&: It->getSecond());
11881 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11882 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11883 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11884 Addr: PrivLVal.getAddress(),
11885 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11886 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11887 LValue BaseLVal =
11888 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11889 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11890 CGF.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11891 CGF.ConvertTypeForMem(T: FiredDecl->getType()), 1)),
11892 FiredLVal, llvm::AtomicOrdering::Unordered,
11893 /*IsVolatile=*/true, /*isInit=*/false);
11894 return;
11895 }
11896
11897 // Private address of the lastprivate conditional in the current context.
11898 // priv_a
11899 LValue LVal = CGF.EmitLValue(E: FoundE);
11900 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11901 Loc: FoundE->getExprLoc());
11902}
11903
11904void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11905 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11906 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11907 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11908 return;
11909 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11910 auto It = llvm::find_if(
11911 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11912 if (It == Range.end() || It->Fn != CGF.CurFn)
11913 return;
11914 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11915 assert(LPCI != LastprivateConditionalToTypes.end() &&
11916 "Lastprivates must be registered already.");
11917 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11918 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
11919 const CapturedStmt *CS = D.getCapturedStmt(CaptureRegions.back());
11920 for (const auto &Pair : It->DeclToUniqueName) {
11921 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11922 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11923 continue;
11924 auto I = LPCI->getSecond().find(Val: Pair.first);
11925 assert(I != LPCI->getSecond().end() &&
11926 "Lastprivate must be rehistered already.");
11927 // bool Cmp = priv_a.Fired != 0;
11928 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11929 LValue FiredLVal =
11930 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11931 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11932 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11933 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11934 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11935 // if (Cmp) {
11936 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11937 CGF.EmitBlock(BB: ThenBB);
11938 Address Addr = CGF.GetAddrOfLocalVar(VD);
11939 LValue LVal;
11940 if (VD->getType()->isReferenceType())
11941 LVal = CGF.EmitLoadOfReferenceLValue(Addr, VD->getType(),
11942 AlignmentSource::Decl);
11943 else
11944 LVal = CGF.MakeAddrLValue(Addr, VD->getType().getNonReferenceType(),
11945 AlignmentSource::Decl);
11946 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11947 Loc: D.getBeginLoc());
11948 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11949 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11950 // }
11951 }
11952}
11953
11954void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11955 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11956 SourceLocation Loc) {
11957 if (CGF.getLangOpts().OpenMP < 50)
11958 return;
11959 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(VD);
11960 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11961 "Unknown lastprivate conditional variable.");
11962 StringRef UniqueName = It->second;
11963 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
11964 // The variable was not updated in the region - exit.
11965 if (!GV)
11966 return;
11967 LValue LPLVal = CGF.MakeRawAddrLValue(
11968 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
11969 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
11970 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
11971}
11972
11973llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11974 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11975 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11976 const RegionCodeGenTy &CodeGen) {
11977 llvm_unreachable("Not supported in SIMD-only mode");
11978}
11979
11980llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11981 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11982 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
11983 const RegionCodeGenTy &CodeGen) {
11984 llvm_unreachable("Not supported in SIMD-only mode");
11985}
11986
11987llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11988 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
11989 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
11990 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
11991 bool Tied, unsigned &NumberOfParts) {
11992 llvm_unreachable("Not supported in SIMD-only mode");
11993}
11994
11995void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
11996 SourceLocation Loc,
11997 llvm::Function *OutlinedFn,
11998 ArrayRef<llvm::Value *> CapturedVars,
11999 const Expr *IfCond,
12000 llvm::Value *NumThreads) {
12001 llvm_unreachable("Not supported in SIMD-only mode");
12002}
12003
12004void CGOpenMPSIMDRuntime::emitCriticalRegion(
12005 CodeGenFunction &CGF, StringRef CriticalName,
12006 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12007 const Expr *Hint) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12009}
12010
12011void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12012 const RegionCodeGenTy &MasterOpGen,
12013 SourceLocation Loc) {
12014 llvm_unreachable("Not supported in SIMD-only mode");
12015}
12016
12017void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12018 const RegionCodeGenTy &MasterOpGen,
12019 SourceLocation Loc,
12020 const Expr *Filter) {
12021 llvm_unreachable("Not supported in SIMD-only mode");
12022}
12023
12024void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12025 SourceLocation Loc) {
12026 llvm_unreachable("Not supported in SIMD-only mode");
12027}
12028
12029void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12030 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12031 SourceLocation Loc) {
12032 llvm_unreachable("Not supported in SIMD-only mode");
12033}
12034
12035void CGOpenMPSIMDRuntime::emitSingleRegion(
12036 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12037 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12038 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12039 ArrayRef<const Expr *> AssignmentOps) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12041}
12042
12043void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12044 const RegionCodeGenTy &OrderedOpGen,
12045 SourceLocation Loc,
12046 bool IsThreads) {
12047 llvm_unreachable("Not supported in SIMD-only mode");
12048}
12049
12050void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12051 SourceLocation Loc,
12052 OpenMPDirectiveKind Kind,
12053 bool EmitChecks,
12054 bool ForceSimpleCall) {
12055 llvm_unreachable("Not supported in SIMD-only mode");
12056}
12057
12058void CGOpenMPSIMDRuntime::emitForDispatchInit(
12059 CodeGenFunction &CGF, SourceLocation Loc,
12060 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12061 bool Ordered, const DispatchRTInput &DispatchValues) {
12062 llvm_unreachable("Not supported in SIMD-only mode");
12063}
12064
12065void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12066 SourceLocation Loc) {
12067 llvm_unreachable("Not supported in SIMD-only mode");
12068}
12069
12070void CGOpenMPSIMDRuntime::emitForStaticInit(
12071 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12072 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12073 llvm_unreachable("Not supported in SIMD-only mode");
12074}
12075
12076void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12077 CodeGenFunction &CGF, SourceLocation Loc,
12078 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12079 llvm_unreachable("Not supported in SIMD-only mode");
12080}
12081
12082void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12083 SourceLocation Loc,
12084 unsigned IVSize,
12085 bool IVSigned) {
12086 llvm_unreachable("Not supported in SIMD-only mode");
12087}
12088
12089void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12090 SourceLocation Loc,
12091 OpenMPDirectiveKind DKind) {
12092 llvm_unreachable("Not supported in SIMD-only mode");
12093}
12094
12095llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12096 SourceLocation Loc,
12097 unsigned IVSize, bool IVSigned,
12098 Address IL, Address LB,
12099 Address UB, Address ST) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12101}
12102
12103void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12104 llvm::Value *NumThreads,
12105 SourceLocation Loc) {
12106 llvm_unreachable("Not supported in SIMD-only mode");
12107}
12108
12109void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12110 ProcBindKind ProcBind,
12111 SourceLocation Loc) {
12112 llvm_unreachable("Not supported in SIMD-only mode");
12113}
12114
12115Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12116 const VarDecl *VD,
12117 Address VDAddr,
12118 SourceLocation Loc) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12120}
12121
12122llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12123 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12124 CodeGenFunction *CGF) {
12125 llvm_unreachable("Not supported in SIMD-only mode");
12126}
12127
12128Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12129 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12130 llvm_unreachable("Not supported in SIMD-only mode");
12131}
12132
12133void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12134 ArrayRef<const Expr *> Vars,
12135 SourceLocation Loc,
12136 llvm::AtomicOrdering AO) {
12137 llvm_unreachable("Not supported in SIMD-only mode");
12138}
12139
12140void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12141 const OMPExecutableDirective &D,
12142 llvm::Function *TaskFunction,
12143 QualType SharedsTy, Address Shareds,
12144 const Expr *IfCond,
12145 const OMPTaskDataTy &Data) {
12146 llvm_unreachable("Not supported in SIMD-only mode");
12147}
12148
12149void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12150 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12151 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12152 const Expr *IfCond, const OMPTaskDataTy &Data) {
12153 llvm_unreachable("Not supported in SIMD-only mode");
12154}
12155
12156void CGOpenMPSIMDRuntime::emitReduction(
12157 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12158 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12159 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12160 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12161 CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
12162 ReductionOps, Options);
12163}
12164
12165llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12166 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12167 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169}
12170
12171void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12172 SourceLocation Loc,
12173 bool IsWorksharingReduction) {
12174 llvm_unreachable("Not supported in SIMD-only mode");
12175}
12176
12177void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12178 SourceLocation Loc,
12179 ReductionCodeGen &RCG,
12180 unsigned N) {
12181 llvm_unreachable("Not supported in SIMD-only mode");
12182}
12183
12184Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12185 SourceLocation Loc,
12186 llvm::Value *ReductionsPtr,
12187 LValue SharedLVal) {
12188 llvm_unreachable("Not supported in SIMD-only mode");
12189}
12190
12191void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12192 SourceLocation Loc,
12193 const OMPTaskDataTy &Data) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12195}
12196
12197void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12198 CodeGenFunction &CGF, SourceLocation Loc,
12199 OpenMPDirectiveKind CancelRegion) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12201}
12202
12203void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12204 SourceLocation Loc, const Expr *IfCond,
12205 OpenMPDirectiveKind CancelRegion) {
12206 llvm_unreachable("Not supported in SIMD-only mode");
12207}
12208
12209void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12210 const OMPExecutableDirective &D, StringRef ParentName,
12211 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12212 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12213 llvm_unreachable("Not supported in SIMD-only mode");
12214}
12215
12216void CGOpenMPSIMDRuntime::emitTargetCall(
12217 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12218 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12219 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12220 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12221 const OMPLoopDirective &D)>
12222 SizeEmitter) {
12223 llvm_unreachable("Not supported in SIMD-only mode");
12224}
12225
12226bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12227 llvm_unreachable("Not supported in SIMD-only mode");
12228}
12229
12230bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12231 llvm_unreachable("Not supported in SIMD-only mode");
12232}
12233
12234bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12235 return false;
12236}
12237
12238void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12239 const OMPExecutableDirective &D,
12240 SourceLocation Loc,
12241 llvm::Function *OutlinedFn,
12242 ArrayRef<llvm::Value *> CapturedVars) {
12243 llvm_unreachable("Not supported in SIMD-only mode");
12244}
12245
12246void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12247 const Expr *NumTeams,
12248 const Expr *ThreadLimit,
12249 SourceLocation Loc) {
12250 llvm_unreachable("Not supported in SIMD-only mode");
12251}
12252
12253void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12254 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12255 const Expr *Device, const RegionCodeGenTy &CodeGen,
12256 CGOpenMPRuntime::TargetDataInfo &Info) {
12257 llvm_unreachable("Not supported in SIMD-only mode");
12258}
12259
12260void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12261 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12262 const Expr *Device) {
12263 llvm_unreachable("Not supported in SIMD-only mode");
12264}
12265
12266void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12267 const OMPLoopDirective &D,
12268 ArrayRef<Expr *> NumIterations) {
12269 llvm_unreachable("Not supported in SIMD-only mode");
12270}
12271
12272void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12273 const OMPDependClause *C) {
12274 llvm_unreachable("Not supported in SIMD-only mode");
12275}
12276
12277void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12278 const OMPDoacrossClause *C) {
12279 llvm_unreachable("Not supported in SIMD-only mode");
12280}
12281
12282const VarDecl *
12283CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12284 const VarDecl *NativeParam) const {
12285 llvm_unreachable("Not supported in SIMD-only mode");
12286}
12287
12288Address
12289CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12290 const VarDecl *NativeParam,
12291 const VarDecl *TargetParam) const {
12292 llvm_unreachable("Not supported in SIMD-only mode");
12293}
12294

source code of clang/lib/CodeGen/CGOpenMPRuntime.cpp