1//===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This provides a class for OpenMP runtime code generation.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGOpenMPRuntime.h"
14#include "ABIInfoImpl.h"
15#include "CGCXXABI.h"
16#include "CGCleanup.h"
17#include "CGDebugInfo.h"
18#include "CGRecordLayout.h"
19#include "CodeGenFunction.h"
20#include "TargetInfo.h"
21#include "clang/AST/APValue.h"
22#include "clang/AST/Attr.h"
23#include "clang/AST/Decl.h"
24#include "clang/AST/OpenMPClause.h"
25#include "clang/AST/StmtOpenMP.h"
26#include "clang/AST/StmtVisitor.h"
27#include "clang/Basic/OpenMPKinds.h"
28#include "clang/Basic/SourceManager.h"
29#include "clang/CodeGen/ConstantInitBuilder.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/SmallVector.h"
32#include "llvm/ADT/StringExtras.h"
33#include "llvm/Bitcode/BitcodeReader.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/GlobalValue.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Value.h"
39#include "llvm/Support/AtomicOrdering.h"
40#include "llvm/Support/raw_ostream.h"
41#include <cassert>
42#include <cstdint>
43#include <numeric>
44#include <optional>
45
46using namespace clang;
47using namespace CodeGen;
48using namespace llvm::omp;
49
50namespace {
51/// Base class for handling code generation inside OpenMP regions.
52class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
53public:
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind {
56 /// Region with outlined function for standalone 'parallel'
57 /// directive.
58 ParallelOutlinedRegion,
59 /// Region with outlined function for standalone 'task' directive.
60 TaskOutlinedRegion,
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
63 InlinedRegion,
64 /// Region with outlined function for standalone 'target' directive.
65 TargetRegion,
66 };
67
68 CGOpenMPRegionInfo(const CapturedStmt &CS,
69 const CGOpenMPRegionKind RegionKind,
70 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
71 bool HasCancel)
72 : CGCapturedStmtInfo(CS, CR_OpenMP), RegionKind(RegionKind),
73 CodeGen(CodeGen), Kind(Kind), HasCancel(HasCancel) {}
74
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind,
76 const RegionCodeGenTy &CodeGen, OpenMPDirectiveKind Kind,
77 bool HasCancel)
78 : CGCapturedStmtInfo(CR_OpenMP), RegionKind(RegionKind), CodeGen(CodeGen),
79 Kind(Kind), HasCancel(HasCancel) {}
80
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl *getThreadIDVariable() const = 0;
84
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override;
87
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
91
92 virtual void emitUntiedSwitch(CodeGenFunction & /*CGF*/) {}
93
94 CGOpenMPRegionKind getRegionKind() const { return RegionKind; }
95
96 OpenMPDirectiveKind getDirectiveKind() const { return Kind; }
97
98 bool hasCancel() const { return HasCancel; }
99
100 static bool classof(const CGCapturedStmtInfo *Info) {
101 return Info->getKind() == CR_OpenMP;
102 }
103
104 ~CGOpenMPRegionInfo() override = default;
105
106protected:
107 CGOpenMPRegionKind RegionKind;
108 RegionCodeGenTy CodeGen;
109 OpenMPDirectiveKind Kind;
110 bool HasCancel;
111};
112
113/// API for captured statement code generation in OpenMP constructs.
114class CGOpenMPOutlinedRegionInfo final : public CGOpenMPRegionInfo {
115public:
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt &CS, const VarDecl *ThreadIDVar,
117 const RegionCodeGenTy &CodeGen,
118 OpenMPDirectiveKind Kind, bool HasCancel,
119 StringRef HelperName)
120 : CGOpenMPRegionInfo(CS, ParallelOutlinedRegion, CodeGen, Kind,
121 HasCancel),
122 ThreadIDVar(ThreadIDVar), HelperName(HelperName) {
123 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
124 }
125
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
129
130 /// Get the name of the capture helper.
131 StringRef getHelperName() const override { return HelperName; }
132
133 static bool classof(const CGCapturedStmtInfo *Info) {
134 return CGOpenMPRegionInfo::classof(Info) &&
135 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
136 ParallelOutlinedRegion;
137 }
138
139private:
140 /// A variable or parameter storing global thread id for OpenMP
141 /// constructs.
142 const VarDecl *ThreadIDVar;
143 StringRef HelperName;
144};
145
146/// API for captured statement code generation in OpenMP constructs.
147class CGOpenMPTaskOutlinedRegionInfo final : public CGOpenMPRegionInfo {
148public:
149 class UntiedTaskActionTy final : public PrePostActionTy {
150 bool Untied;
151 const VarDecl *PartIDVar;
152 const RegionCodeGenTy UntiedCodeGen;
153 llvm::SwitchInst *UntiedSwitch = nullptr;
154
155 public:
156 UntiedTaskActionTy(bool Tied, const VarDecl *PartIDVar,
157 const RegionCodeGenTy &UntiedCodeGen)
158 : Untied(!Tied), PartIDVar(PartIDVar), UntiedCodeGen(UntiedCodeGen) {}
159 void Enter(CodeGenFunction &CGF) override {
160 if (Untied) {
161 // Emit task switching point.
162 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
163 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
164 PtrTy: PartIDVar->getType()->castAs<PointerType>());
165 llvm::Value *Res =
166 CGF.EmitLoadOfScalar(lvalue: PartIdLVal, Loc: PartIDVar->getLocation());
167 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: ".untied.done.");
168 UntiedSwitch = CGF.Builder.CreateSwitch(V: Res, Dest: DoneBB);
169 CGF.EmitBlock(BB: DoneBB);
170 CGF.EmitBranchThroughCleanup(Dest: CGF.ReturnBlock);
171 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
172 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: 0),
173 Dest: CGF.Builder.GetInsertBlock());
174 emitUntiedSwitch(CGF);
175 }
176 }
177 void emitUntiedSwitch(CodeGenFunction &CGF) const {
178 if (Untied) {
179 LValue PartIdLVal = CGF.EmitLoadOfPointerLValue(
180 Ptr: CGF.GetAddrOfLocalVar(VD: PartIDVar),
181 PtrTy: PartIDVar->getType()->castAs<PointerType>());
182 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
183 lvalue: PartIdLVal);
184 UntiedCodeGen(CGF);
185 CodeGenFunction::JumpDest CurPoint =
186 CGF.getJumpDestInCurrentScope(Name: ".untied.next.");
187 CGF.EmitBranch(Block: CGF.ReturnBlock.getBlock());
188 CGF.EmitBlock(BB: CGF.createBasicBlock(name: ".untied.jmp."));
189 UntiedSwitch->addCase(OnVal: CGF.Builder.getInt32(C: UntiedSwitch->getNumCases()),
190 Dest: CGF.Builder.GetInsertBlock());
191 CGF.EmitBranchThroughCleanup(Dest: CurPoint);
192 CGF.EmitBlock(BB: CurPoint.getBlock());
193 }
194 }
195 unsigned getNumberOfParts() const { return UntiedSwitch->getNumCases(); }
196 };
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt &CS,
198 const VarDecl *ThreadIDVar,
199 const RegionCodeGenTy &CodeGen,
200 OpenMPDirectiveKind Kind, bool HasCancel,
201 const UntiedTaskActionTy &Action)
202 : CGOpenMPRegionInfo(CS, TaskOutlinedRegion, CodeGen, Kind, HasCancel),
203 ThreadIDVar(ThreadIDVar), Action(Action) {
204 assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
205 }
206
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl *getThreadIDVariable() const override { return ThreadIDVar; }
210
211 /// Get an LValue for the current ThreadID variable.
212 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override;
213
214 /// Get the name of the capture helper.
215 StringRef getHelperName() const override { return ".omp_outlined."; }
216
217 void emitUntiedSwitch(CodeGenFunction &CGF) override {
218 Action.emitUntiedSwitch(CGF);
219 }
220
221 static bool classof(const CGCapturedStmtInfo *Info) {
222 return CGOpenMPRegionInfo::classof(Info) &&
223 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() ==
224 TaskOutlinedRegion;
225 }
226
227private:
228 /// A variable or parameter storing global thread id for OpenMP
229 /// constructs.
230 const VarDecl *ThreadIDVar;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy &Action;
233};
234
235/// API for inlined captured statement code generation in OpenMP
236/// constructs.
237class CGOpenMPInlinedRegionInfo : public CGOpenMPRegionInfo {
238public:
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo *OldCSI,
240 const RegionCodeGenTy &CodeGen,
241 OpenMPDirectiveKind Kind, bool HasCancel)
242 : CGOpenMPRegionInfo(InlinedRegion, CodeGen, Kind, HasCancel),
243 OldCSI(OldCSI),
244 OuterRegionInfo(dyn_cast_or_null<CGOpenMPRegionInfo>(Val: OldCSI)) {}
245
246 // Retrieve the value of the context parameter.
247 llvm::Value *getContextValue() const override {
248 if (OuterRegionInfo)
249 return OuterRegionInfo->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
251 }
252
253 void setContextValue(llvm::Value *V) override {
254 if (OuterRegionInfo) {
255 OuterRegionInfo->setContextValue(V);
256 return;
257 }
258 llvm_unreachable("No context value for inlined OpenMP region");
259 }
260
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl *lookup(const VarDecl *VD) const override {
263 if (OuterRegionInfo)
264 return OuterRegionInfo->lookup(VD);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
267 return nullptr;
268 }
269
270 FieldDecl *getThisFieldDecl() const override {
271 if (OuterRegionInfo)
272 return OuterRegionInfo->getThisFieldDecl();
273 return nullptr;
274 }
275
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl *getThreadIDVariable() const override {
279 if (OuterRegionInfo)
280 return OuterRegionInfo->getThreadIDVariable();
281 return nullptr;
282 }
283
284 /// Get an LValue for the current ThreadID variable.
285 LValue getThreadIDVariableLValue(CodeGenFunction &CGF) override {
286 if (OuterRegionInfo)
287 return OuterRegionInfo->getThreadIDVariableLValue(CGF);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
289 }
290
291 /// Get the name of the capture helper.
292 StringRef getHelperName() const override {
293 if (auto *OuterRegionInfo = getOldCSI())
294 return OuterRegionInfo->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
296 }
297
298 void emitUntiedSwitch(CodeGenFunction &CGF) override {
299 if (OuterRegionInfo)
300 OuterRegionInfo->emitUntiedSwitch(CGF);
301 }
302
303 CodeGenFunction::CGCapturedStmtInfo *getOldCSI() const { return OldCSI; }
304
305 static bool classof(const CGCapturedStmtInfo *Info) {
306 return CGOpenMPRegionInfo::classof(Info) &&
307 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == InlinedRegion;
308 }
309
310 ~CGOpenMPInlinedRegionInfo() override = default;
311
312private:
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo *OldCSI;
315 CGOpenMPRegionInfo *OuterRegionInfo;
316};
317
318/// API for captured statement code generation in OpenMP target
319/// constructs. For this captures, implicit parameters are used instead of the
320/// captured fields. The name of the target region has to be unique in a given
321/// application so it is provided by the client, because only the client has
322/// the information to generate that.
323class CGOpenMPTargetRegionInfo final : public CGOpenMPRegionInfo {
324public:
325 CGOpenMPTargetRegionInfo(const CapturedStmt &CS,
326 const RegionCodeGenTy &CodeGen, StringRef HelperName)
327 : CGOpenMPRegionInfo(CS, TargetRegion, CodeGen, OMPD_target,
328 /*HasCancel=*/false),
329 HelperName(HelperName) {}
330
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl *getThreadIDVariable() const override { return nullptr; }
334
335 /// Get the name of the capture helper.
336 StringRef getHelperName() const override { return HelperName; }
337
338 static bool classof(const CGCapturedStmtInfo *Info) {
339 return CGOpenMPRegionInfo::classof(Info) &&
340 cast<CGOpenMPRegionInfo>(Val: Info)->getRegionKind() == TargetRegion;
341 }
342
343private:
344 StringRef HelperName;
345};
346
347static void EmptyCodeGen(CodeGenFunction &, PrePostActionTy &) {
348 llvm_unreachable("No codegen for expressions");
349}
350/// API for generation of expressions captured in a innermost OpenMP
351/// region.
352class CGOpenMPInnerExprInfo final : public CGOpenMPInlinedRegionInfo {
353public:
354 CGOpenMPInnerExprInfo(CodeGenFunction &CGF, const CapturedStmt &CS)
355 : CGOpenMPInlinedRegionInfo(CGF.CapturedStmtInfo, EmptyCodeGen,
356 OMPD_unknown,
357 /*HasCancel=*/false),
358 PrivScope(CGF) {
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C : CS.captures()) {
363 if (!C.capturesVariable() && !C.capturesVariableByCopy())
364 continue;
365
366 const VarDecl *VD = C.getCapturedVar();
367 if (VD->isLocalVarDeclOrParm())
368 continue;
369
370 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD->getType().getNonReferenceType(), VK_LValue,
373 C.getLocation());
374 PrivScope.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(E: &DRE).getAddress());
375 }
376 (void)PrivScope.Privatize();
377 }
378
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl *lookup(const VarDecl *VD) const override {
381 if (const FieldDecl *FD = CGOpenMPInlinedRegionInfo::lookup(VD))
382 return FD;
383 return nullptr;
384 }
385
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction &CGF, const Stmt *S) override {
388 llvm_unreachable("No body for expressions");
389 }
390
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl *getThreadIDVariable() const override {
394 llvm_unreachable("No thread id for expressions");
395 }
396
397 /// Get the name of the capture helper.
398 StringRef getHelperName() const override {
399 llvm_unreachable("No helper name for expressions");
400 }
401
402 static bool classof(const CGCapturedStmtInfo *Info) { return false; }
403
404private:
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope;
407};
408
409/// RAII for emitting code of OpenMP constructs.
410class InlinedOpenMPRegionRAII {
411 CodeGenFunction &CGF;
412 llvm::DenseMap<const ValueDecl *, FieldDecl *> LambdaCaptureFields;
413 FieldDecl *LambdaThisCaptureField = nullptr;
414 const CodeGen::CGBlockInfo *BlockInfo = nullptr;
415 bool NoInheritance = false;
416
417public:
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
421 /// regions.
422 InlinedOpenMPRegionRAII(CodeGenFunction &CGF, const RegionCodeGenTy &CodeGen,
423 OpenMPDirectiveKind Kind, bool HasCancel,
424 bool NoInheritance = true)
425 : CGF(CGF), NoInheritance(NoInheritance) {
426 // Start emission for the construct.
427 CGF.CapturedStmtInfo = new CGOpenMPInlinedRegionInfo(
428 CGF.CapturedStmtInfo, CodeGen, Kind, HasCancel);
429 if (NoInheritance) {
430 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
431 LambdaThisCaptureField = CGF.LambdaThisCaptureField;
432 CGF.LambdaThisCaptureField = nullptr;
433 BlockInfo = CGF.BlockInfo;
434 CGF.BlockInfo = nullptr;
435 }
436 }
437
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
440 auto *OldCSI =
441 cast<CGOpenMPInlinedRegionInfo>(Val: CGF.CapturedStmtInfo)->getOldCSI();
442 delete CGF.CapturedStmtInfo;
443 CGF.CapturedStmtInfo = OldCSI;
444 if (NoInheritance) {
445 std::swap(a&: CGF.LambdaCaptureFields, b&: LambdaCaptureFields);
446 CGF.LambdaThisCaptureField = LambdaThisCaptureField;
447 CGF.BlockInfo = BlockInfo;
448 }
449 }
450};
451
452/// Values for bit flags used in the ident_t to describe the fields.
453/// All enumeric elements are named and described in accordance with the code
454/// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455enum OpenMPLocationFlags : unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD = 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC = 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE = 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL = 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL = 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR = 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS = 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE = 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP = 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS = 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE = 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE)
479};
480
481/// Describes ident structure that describes a source location.
482/// All descriptions are taken from
483/// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
484/// Original structure:
485/// typedef struct ident {
486/// kmp_int32 reserved_1; /**< might be used in Fortran;
487/// see above */
488/// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
489/// KMP_IDENT_KMPC identifies this union
490/// member */
491/// kmp_int32 reserved_2; /**< not really used in Fortran any more;
492/// see above */
493///#if USE_ITT_BUILD
494/// /* but currently used for storing
495/// region-specific ITT */
496/// /* contextual information. */
497///#endif /* USE_ITT_BUILD */
498/// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499/// C++ */
500/// char const *psource; /**< String describing the source location.
501/// The string is composed of semi-colon separated
502// fields which describe the source file,
503/// the function and a pair of line numbers that
504/// delimit the construct.
505/// */
506/// } ident_t;
507enum IdentFieldIndex {
508 /// might be used in Fortran
509 IdentField_Reserved_1,
510 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 IdentField_Flags,
512 /// Not really used in Fortran any more
513 IdentField_Reserved_2,
514 /// Source[4] in Fortran, do not use for C++
515 IdentField_Reserved_3,
516 /// String describing the source location. The string is composed of
517 /// semi-colon separated fields which describe the source file, the function
518 /// and a pair of line numbers that delimit the construct.
519 IdentField_PSource
520};
521
522/// Schedule types for 'omp for' loops (these enumerators are taken from
523/// the enum sched_type in kmp.h).
524enum OpenMPSchedType {
525 /// Lower bound for default (unordered) versions.
526 OMP_sch_lower = 32,
527 OMP_sch_static_chunked = 33,
528 OMP_sch_static = 34,
529 OMP_sch_dynamic_chunked = 35,
530 OMP_sch_guided_chunked = 36,
531 OMP_sch_runtime = 37,
532 OMP_sch_auto = 38,
533 /// static with chunk adjustment (e.g., simd)
534 OMP_sch_static_balanced_chunked = 45,
535 /// Lower bound for 'ordered' versions.
536 OMP_ord_lower = 64,
537 OMP_ord_static_chunked = 65,
538 OMP_ord_static = 66,
539 OMP_ord_dynamic_chunked = 67,
540 OMP_ord_guided_chunked = 68,
541 OMP_ord_runtime = 69,
542 OMP_ord_auto = 70,
543 OMP_sch_default = OMP_sch_static,
544 /// dist_schedule types
545 OMP_dist_sch_static_chunked = 91,
546 OMP_dist_sch_static = 92,
547 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
548 /// Set if the monotonic schedule modifier was present.
549 OMP_sch_modifier_monotonic = (1 << 29),
550 /// Set if the nonmonotonic schedule modifier was present.
551 OMP_sch_modifier_nonmonotonic = (1 << 30),
552};
553
554/// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555/// region.
556class CleanupTy final : public EHScopeStack::Cleanup {
557 PrePostActionTy *Action;
558
559public:
560 explicit CleanupTy(PrePostActionTy *Action) : Action(Action) {}
561 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
562 if (!CGF.HaveInsertPoint())
563 return;
564 Action->Exit(CGF);
565 }
566};
567
568} // anonymous namespace
569
570void RegionCodeGenTy::operator()(CodeGenFunction &CGF) const {
571 CodeGenFunction::RunCleanupsScope Scope(CGF);
572 if (PrePostAction) {
573 CGF.EHStack.pushCleanup<CleanupTy>(Kind: NormalAndEHCleanup, A: PrePostAction);
574 Callback(CodeGen, CGF, *PrePostAction);
575 } else {
576 PrePostActionTy Action;
577 Callback(CodeGen, CGF, Action);
578 }
579}
580
581/// Check if the combiner is a call to UDR combiner and if it is so return the
582/// UDR decl used for reduction.
583static const OMPDeclareReductionDecl *
584getReductionInit(const Expr *ReductionOp) {
585 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
586 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
587 if (const auto *DRE =
588 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
589 if (const auto *DRD = dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl()))
590 return DRD;
591 return nullptr;
592}
593
594static void emitInitWithReductionInitializer(CodeGenFunction &CGF,
595 const OMPDeclareReductionDecl *DRD,
596 const Expr *InitOp,
597 Address Private, Address Original,
598 QualType Ty) {
599 if (DRD->getInitializer()) {
600 std::pair<llvm::Function *, llvm::Function *> Reduction =
601 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
602 const auto *CE = cast<CallExpr>(Val: InitOp);
603 const auto *OVE = cast<OpaqueValueExpr>(Val: CE->getCallee());
604 const Expr *LHS = CE->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
605 const Expr *RHS = CE->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 const auto *LHSDRE =
607 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: LHS)->getSubExpr());
608 const auto *RHSDRE =
609 cast<DeclRefExpr>(Val: cast<UnaryOperator>(Val: RHS)->getSubExpr());
610 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
611 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: LHSDRE->getDecl()), Addr: Private);
612 PrivateScope.addPrivate(LocalVD: cast<VarDecl>(Val: RHSDRE->getDecl()), Addr: Original);
613 (void)PrivateScope.Privatize();
614 RValue Func = RValue::get(V: Reduction.second);
615 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
616 CGF.EmitIgnoredExpr(E: InitOp);
617 } else {
618 llvm::Constant *Init = CGF.CGM.EmitNullConstant(T: Ty);
619 std::string Name = CGF.CGM.getOpenMPRuntime().getName(Parts: {"init"});
620 auto *GV = new llvm::GlobalVariable(
621 CGF.CGM.getModule(), Init->getType(), /*isConstant=*/true,
622 llvm::GlobalValue::PrivateLinkage, Init, Name);
623 LValue LV = CGF.MakeNaturalAlignRawAddrLValue(V: GV, T: Ty);
624 RValue InitRVal;
625 switch (CGF.getEvaluationKind(T: Ty)) {
626 case TEK_Scalar:
627 InitRVal = CGF.EmitLoadOfLValue(V: LV, Loc: DRD->getLocation());
628 break;
629 case TEK_Complex:
630 InitRVal =
631 RValue::getComplex(C: CGF.EmitLoadOfComplex(src: LV, loc: DRD->getLocation()));
632 break;
633 case TEK_Aggregate: {
634 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_LValue);
635 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, LV);
636 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
637 /*IsInitializer=*/false);
638 return;
639 }
640 }
641 OpaqueValueExpr OVE(DRD->getLocation(), Ty, VK_PRValue);
642 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, &OVE, InitRVal);
643 CGF.EmitAnyExprToMem(E: &OVE, Location: Private, Quals: Ty.getQualifiers(),
644 /*IsInitializer=*/false);
645 }
646}
647
648/// Emit initialization of arrays of complex types.
649/// \param DestAddr Address of the array.
650/// \param Type Type of array.
651/// \param Init Initial expression of array.
652/// \param SrcAddr Address of the original array.
653static void EmitOMPAggregateInit(CodeGenFunction &CGF, Address DestAddr,
654 QualType Type, bool EmitDeclareReductionInit,
655 const Expr *Init,
656 const OMPDeclareReductionDecl *DRD,
657 Address SrcAddr = Address::invalid()) {
658 // Perform element-by-element initialization.
659 QualType ElementTy;
660
661 // Drill down to the base element type on both arrays.
662 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
663 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
664 if (DRD)
665 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
666
667 llvm::Value *SrcBegin = nullptr;
668 if (DRD)
669 SrcBegin = SrcAddr.emitRawPointer(CGF);
670 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF);
671 // Cast from pointer to array type to pointer to single element.
672 llvm::Value *DestEnd =
673 CGF.Builder.CreateGEP(Ty: DestAddr.getElementType(), Ptr: DestBegin, IdxList: NumElements);
674 // The basic structure here is a while-do loop.
675 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arrayinit.body");
676 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arrayinit.done");
677 llvm::Value *IsEmpty =
678 CGF.Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arrayinit.isempty");
679 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
680
681 // Enter the loop body, making that address the current address.
682 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
683 CGF.EmitBlock(BB: BodyBB);
684
685 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
686
687 llvm::PHINode *SrcElementPHI = nullptr;
688 Address SrcElementCurrent = Address::invalid();
689 if (DRD) {
690 SrcElementPHI = CGF.Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2,
691 Name: "omp.arraycpy.srcElementPast");
692 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
693 SrcElementCurrent =
694 Address(SrcElementPHI, SrcAddr.getElementType(),
695 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
696 }
697 llvm::PHINode *DestElementPHI = CGF.Builder.CreatePHI(
698 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
699 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
700 Address DestElementCurrent =
701 Address(DestElementPHI, DestAddr.getElementType(),
702 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
703
704 // Emit copy.
705 {
706 CodeGenFunction::RunCleanupsScope InitScope(CGF);
707 if (EmitDeclareReductionInit) {
708 emitInitWithReductionInitializer(CGF, DRD, InitOp: Init, Private: DestElementCurrent,
709 Original: SrcElementCurrent, Ty: ElementTy);
710 } else
711 CGF.EmitAnyExprToMem(E: Init, Location: DestElementCurrent, Quals: ElementTy.getQualifiers(),
712 /*IsInitializer=*/false);
713 }
714
715 if (DRD) {
716 // Shift the address forward by one element.
717 llvm::Value *SrcElementNext = CGF.Builder.CreateConstGEP1_32(
718 Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, /*Idx0=*/1,
719 Name: "omp.arraycpy.dest.element");
720 SrcElementPHI->addIncoming(V: SrcElementNext, BB: CGF.Builder.GetInsertBlock());
721 }
722
723 // Shift the address forward by one element.
724 llvm::Value *DestElementNext = CGF.Builder.CreateConstGEP1_32(
725 Ty: DestAddr.getElementType(), Ptr: DestElementPHI, /*Idx0=*/1,
726 Name: "omp.arraycpy.dest.element");
727 // Check whether we've reached the end.
728 llvm::Value *Done =
729 CGF.Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
730 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
731 DestElementPHI->addIncoming(V: DestElementNext, BB: CGF.Builder.GetInsertBlock());
732
733 // Done.
734 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
735}
736
737LValue ReductionCodeGen::emitSharedLValue(CodeGenFunction &CGF, const Expr *E) {
738 return CGF.EmitOMPSharedLValue(E);
739}
740
741LValue ReductionCodeGen::emitSharedLValueUB(CodeGenFunction &CGF,
742 const Expr *E) {
743 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E))
744 return CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false);
745 return LValue();
746}
747
748void ReductionCodeGen::emitAggregateInitialization(
749 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
750 const OMPDeclareReductionDecl *DRD) {
751 // Emit VarDecl with copy init for arrays.
752 // Get the address of the original variable captured in current
753 // captured region.
754 const auto *PrivateVD =
755 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
756 bool EmitDeclareReductionInit =
757 DRD && (DRD->getInitializer() || !PrivateVD->hasInit());
758 EmitOMPAggregateInit(CGF, DestAddr: PrivateAddr, Type: PrivateVD->getType(),
759 EmitDeclareReductionInit,
760 Init: EmitDeclareReductionInit ? ClausesData[N].ReductionOp
761 : PrivateVD->getInit(),
762 DRD, SrcAddr: SharedAddr);
763}
764
765ReductionCodeGen::ReductionCodeGen(ArrayRef<const Expr *> Shareds,
766 ArrayRef<const Expr *> Origs,
767 ArrayRef<const Expr *> Privates,
768 ArrayRef<const Expr *> ReductionOps) {
769 ClausesData.reserve(N: Shareds.size());
770 SharedAddresses.reserve(N: Shareds.size());
771 Sizes.reserve(N: Shareds.size());
772 BaseDecls.reserve(N: Shareds.size());
773 const auto *IOrig = Origs.begin();
774 const auto *IPriv = Privates.begin();
775 const auto *IRed = ReductionOps.begin();
776 for (const Expr *Ref : Shareds) {
777 ClausesData.emplace_back(Args&: Ref, Args: *IOrig, Args: *IPriv, Args: *IRed);
778 std::advance(i&: IOrig, n: 1);
779 std::advance(i&: IPriv, n: 1);
780 std::advance(i&: IRed, n: 1);
781 }
782}
783
784void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction &CGF, unsigned N) {
785 assert(SharedAddresses.size() == N && OrigAddresses.size() == N &&
786 "Number of generated lvalues must be exactly N.");
787 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Shared);
788 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Shared);
789 SharedAddresses.emplace_back(Args&: First, Args&: Second);
790 if (ClausesData[N].Shared == ClausesData[N].Ref) {
791 OrigAddresses.emplace_back(Args&: First, Args&: Second);
792 } else {
793 LValue First = emitSharedLValue(CGF, E: ClausesData[N].Ref);
794 LValue Second = emitSharedLValueUB(CGF, E: ClausesData[N].Ref);
795 OrigAddresses.emplace_back(Args&: First, Args&: Second);
796 }
797}
798
799void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
800 QualType PrivateType = getPrivateType(N);
801 bool AsArraySection = isa<ArraySectionExpr>(Val: ClausesData[N].Ref);
802 if (!PrivateType->isVariablyModifiedType()) {
803 Sizes.emplace_back(
804 Args: CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType()),
805 Args: nullptr);
806 return;
807 }
808 llvm::Value *Size;
809 llvm::Value *SizeInChars;
810 auto *ElemType = OrigAddresses[N].first.getAddress().getElementType();
811 auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(Ty: ElemType);
812 if (AsArraySection) {
813 Size = CGF.Builder.CreatePtrDiff(ElemTy: ElemType,
814 LHS: OrigAddresses[N].second.getPointer(CGF),
815 RHS: OrigAddresses[N].first.getPointer(CGF));
816 Size = CGF.Builder.CreateNUWAdd(
817 LHS: Size, RHS: llvm::ConstantInt::get(Ty: Size->getType(), /*V=*/1));
818 SizeInChars = CGF.Builder.CreateNUWMul(LHS: Size, RHS: ElemSizeOf);
819 } else {
820 SizeInChars =
821 CGF.getTypeSize(Ty: OrigAddresses[N].first.getType().getNonReferenceType());
822 Size = CGF.Builder.CreateExactUDiv(LHS: SizeInChars, RHS: ElemSizeOf);
823 }
824 Sizes.emplace_back(Args&: SizeInChars, Args&: Size);
825 CodeGenFunction::OpaqueValueMapping OpaqueMap(
826 CGF,
827 cast<OpaqueValueExpr>(
828 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
829 RValue::get(V: Size));
830 CGF.EmitVariablyModifiedType(Ty: PrivateType);
831}
832
833void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N,
834 llvm::Value *Size) {
835 QualType PrivateType = getPrivateType(N);
836 if (!PrivateType->isVariablyModifiedType()) {
837 assert(!Size && !Sizes[N].second &&
838 "Size should be nullptr for non-variably modified reduction "
839 "items.");
840 return;
841 }
842 CodeGenFunction::OpaqueValueMapping OpaqueMap(
843 CGF,
844 cast<OpaqueValueExpr>(
845 Val: CGF.getContext().getAsVariableArrayType(T: PrivateType)->getSizeExpr()),
846 RValue::get(V: Size));
847 CGF.EmitVariablyModifiedType(Ty: PrivateType);
848}
849
850void ReductionCodeGen::emitInitialization(
851 CodeGenFunction &CGF, unsigned N, Address PrivateAddr, Address SharedAddr,
852 llvm::function_ref<bool(CodeGenFunction &)> DefaultInit) {
853 assert(SharedAddresses.size() > N && "No variable was generated");
854 const auto *PrivateVD =
855 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Private)->getDecl());
856 const OMPDeclareReductionDecl *DRD =
857 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
858 if (CGF.getContext().getAsArrayType(T: PrivateVD->getType())) {
859 if (DRD && DRD->getInitializer())
860 (void)DefaultInit(CGF);
861 emitAggregateInitialization(CGF, N, PrivateAddr, SharedAddr, DRD);
862 } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
863 (void)DefaultInit(CGF);
864 QualType SharedType = SharedAddresses[N].first.getType();
865 emitInitWithReductionInitializer(CGF, DRD, InitOp: ClausesData[N].ReductionOp,
866 Private: PrivateAddr, Original: SharedAddr, Ty: SharedType);
867 } else if (!DefaultInit(CGF) && PrivateVD->hasInit() &&
868 !CGF.isTrivialInitializer(Init: PrivateVD->getInit())) {
869 CGF.EmitAnyExprToMem(E: PrivateVD->getInit(), Location: PrivateAddr,
870 Quals: PrivateVD->getType().getQualifiers(),
871 /*IsInitializer=*/false);
872 }
873}
874
875bool ReductionCodeGen::needCleanups(unsigned N) {
876 QualType PrivateType = getPrivateType(N);
877 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
878 return DTorKind != QualType::DK_none;
879}
880
881void ReductionCodeGen::emitCleanups(CodeGenFunction &CGF, unsigned N,
882 Address PrivateAddr) {
883 QualType PrivateType = getPrivateType(N);
884 QualType::DestructionKind DTorKind = PrivateType.isDestructedType();
885 if (needCleanups(N)) {
886 PrivateAddr =
887 PrivateAddr.withElementType(ElemTy: CGF.ConvertTypeForMem(T: PrivateType));
888 CGF.pushDestroy(dtorKind: DTorKind, addr: PrivateAddr, type: PrivateType);
889 }
890}
891
892static LValue loadToBegin(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
893 LValue BaseLV) {
894 BaseTy = BaseTy.getNonReferenceType();
895 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
896 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
897 if (const auto *PtrTy = BaseTy->getAs<PointerType>()) {
898 BaseLV = CGF.EmitLoadOfPointerLValue(Ptr: BaseLV.getAddress(), PtrTy);
899 } else {
900 LValue RefLVal = CGF.MakeAddrLValue(Addr: BaseLV.getAddress(), T: BaseTy);
901 BaseLV = CGF.EmitLoadOfReferenceLValue(RefLVal);
902 }
903 BaseTy = BaseTy->getPointeeType();
904 }
905 return CGF.MakeAddrLValue(
906 Addr: BaseLV.getAddress().withElementType(ElemTy: CGF.ConvertTypeForMem(T: ElTy)),
907 T: BaseLV.getType(), BaseInfo: BaseLV.getBaseInfo(),
908 TBAAInfo: CGF.CGM.getTBAAInfoForSubobject(Base: BaseLV, AccessType: BaseLV.getType()));
909}
910
911static Address castToBase(CodeGenFunction &CGF, QualType BaseTy, QualType ElTy,
912 Address OriginalBaseAddress, llvm::Value *Addr) {
913 RawAddress Tmp = RawAddress::invalid();
914 Address TopTmp = Address::invalid();
915 Address MostTopTmp = Address::invalid();
916 BaseTy = BaseTy.getNonReferenceType();
917 while ((BaseTy->isPointerType() || BaseTy->isReferenceType()) &&
918 !CGF.getContext().hasSameType(T1: BaseTy, T2: ElTy)) {
919 Tmp = CGF.CreateMemTemp(T: BaseTy);
920 if (TopTmp.isValid())
921 CGF.Builder.CreateStore(Val: Tmp.getPointer(), Addr: TopTmp);
922 else
923 MostTopTmp = Tmp;
924 TopTmp = Tmp;
925 BaseTy = BaseTy->getPointeeType();
926 }
927
928 if (Tmp.isValid()) {
929 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
930 V: Addr, DestTy: Tmp.getElementType());
931 CGF.Builder.CreateStore(Val: Addr, Addr: Tmp);
932 return MostTopTmp;
933 }
934
935 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
936 V: Addr, DestTy: OriginalBaseAddress.getType());
937 return OriginalBaseAddress.withPointer(NewPointer: Addr, IsKnownNonNull: NotKnownNonNull);
938}
939
940static const VarDecl *getBaseDecl(const Expr *Ref, const DeclRefExpr *&DE) {
941 const VarDecl *OrigVD = nullptr;
942 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: Ref)) {
943 const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
944 while (const auto *TempOASE = dyn_cast<ArraySectionExpr>(Val: Base))
945 Base = TempOASE->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
947 Base = TempASE->getBase()->IgnoreParenImpCasts();
948 DE = cast<DeclRefExpr>(Val: Base);
949 OrigVD = cast<VarDecl>(Val: DE->getDecl());
950 } else if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Ref)) {
951 const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
952 while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
953 Base = TempASE->getBase()->IgnoreParenImpCasts();
954 DE = cast<DeclRefExpr>(Val: Base);
955 OrigVD = cast<VarDecl>(Val: DE->getDecl());
956 }
957 return OrigVD;
958}
959
960Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
961 Address PrivateAddr) {
962 const DeclRefExpr *DE;
963 if (const VarDecl *OrigVD = ::getBaseDecl(Ref: ClausesData[N].Ref, DE)) {
964 BaseDecls.emplace_back(Args&: OrigVD);
965 LValue OriginalBaseLValue = CGF.EmitLValue(E: DE);
966 LValue BaseLValue =
967 loadToBegin(CGF, BaseTy: OrigVD->getType(), ElTy: SharedAddresses[N].first.getType(),
968 BaseLV: OriginalBaseLValue);
969 Address SharedAddr = SharedAddresses[N].first.getAddress();
970 llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
971 ElemTy: SharedAddr.getElementType(), LHS: BaseLValue.getPointer(CGF),
972 RHS: SharedAddr.emitRawPointer(CGF));
973 llvm::Value *PrivatePointer =
974 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
975 V: PrivateAddr.emitRawPointer(CGF), DestTy: SharedAddr.getType());
976 llvm::Value *Ptr = CGF.Builder.CreateGEP(
977 Ty: SharedAddr.getElementType(), Ptr: PrivatePointer, IdxList: Adjustment);
978 return castToBase(CGF, BaseTy: OrigVD->getType(),
979 ElTy: SharedAddresses[N].first.getType(),
980 OriginalBaseAddress: OriginalBaseLValue.getAddress(), Addr: Ptr);
981 }
982 BaseDecls.emplace_back(
983 Args: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: ClausesData[N].Ref)->getDecl()));
984 return PrivateAddr;
985}
986
987bool ReductionCodeGen::usesReductionInitializer(unsigned N) const {
988 const OMPDeclareReductionDecl *DRD =
989 getReductionInit(ReductionOp: ClausesData[N].ReductionOp);
990 return DRD && DRD->getInitializer();
991}
992
993LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
994 return CGF.EmitLoadOfPointerLValue(
995 Ptr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
996 PtrTy: getThreadIDVariable()->getType()->castAs<PointerType>());
997}
998
999void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt *S) {
1000 if (!CGF.HaveInsertPoint())
1001 return;
1002 // 1.2.2 OpenMP Language Terminology
1003 // Structured block - An executable statement with a single entry at the
1004 // top and a single exit at the bottom.
1005 // The point of exit cannot be a branch out of the structured block.
1006 // longjmp() and throw() must not violate the entry/exit criteria.
1007 CGF.EHStack.pushTerminate();
1008 if (S)
1009 CGF.incrementProfileCounter(S);
1010 CodeGen(CGF);
1011 CGF.EHStack.popTerminate();
1012}
1013
1014LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1015 CodeGenFunction &CGF) {
1016 return CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: getThreadIDVariable()),
1017 T: getThreadIDVariable()->getType(),
1018 Source: AlignmentSource::Decl);
1019}
1020
1021static FieldDecl *addFieldToRecordDecl(ASTContext &C, DeclContext *DC,
1022 QualType FieldTy) {
1023 auto *Field = FieldDecl::Create(
1024 C, DC, StartLoc: SourceLocation(), IdLoc: SourceLocation(), /*Id=*/nullptr, T: FieldTy,
1025 TInfo: C.getTrivialTypeSourceInfo(T: FieldTy, Loc: SourceLocation()),
1026 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit);
1027 Field->setAccess(AS_public);
1028 DC->addDecl(D: Field);
1029 return Field;
1030}
1031
1032CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM)
1033 : CGM(CGM), OMPBuilder(CGM.getModule()) {
1034 KmpCriticalNameTy = llvm::ArrayType::get(ElementType: CGM.Int32Ty, /*NumElements*/ 8);
1035 llvm::OpenMPIRBuilderConfig Config(
1036 CGM.getLangOpts().OpenMPIsTargetDevice, isGPU(),
1037 CGM.getLangOpts().OpenMPOffloadMandatory,
1038 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1039 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1040 OMPBuilder.initialize();
1041 OMPBuilder.loadOffloadInfoMetadata(HostFilePath: CGM.getLangOpts().OpenMPIsTargetDevice
1042 ? CGM.getLangOpts().OMPHostIRFile
1043 : StringRef{});
1044 OMPBuilder.setConfig(Config);
1045
1046 // The user forces the compiler to behave as if omp requires
1047 // unified_shared_memory was given.
1048 if (CGM.getLangOpts().OpenMPForceUSM) {
1049 HasRequiresUnifiedSharedMemory = true;
1050 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
1051 }
1052}
1053
1054void CGOpenMPRuntime::clear() {
1055 InternalVars.clear();
1056 // Clean non-target variable declarations possibly used only in debug info.
1057 for (const auto &Data : EmittedNonTargetVariables) {
1058 if (!Data.getValue().pointsToAliveValue())
1059 continue;
1060 auto *GV = dyn_cast<llvm::GlobalVariable>(Val: Data.getValue());
1061 if (!GV)
1062 continue;
1063 if (!GV->isDeclaration() || GV->getNumUses() > 0)
1064 continue;
1065 GV->eraseFromParent();
1066 }
1067}
1068
1069std::string CGOpenMPRuntime::getName(ArrayRef<StringRef> Parts) const {
1070 return OMPBuilder.createPlatformSpecificName(Parts);
1071}
1072
1073static llvm::Function *
1074emitCombinerOrInitializer(CodeGenModule &CGM, QualType Ty,
1075 const Expr *CombinerInitializer, const VarDecl *In,
1076 const VarDecl *Out, bool IsCombiner) {
1077 // void .omp_combiner.(Ty *in, Ty *out);
1078 ASTContext &C = CGM.getContext();
1079 QualType PtrTy = C.getPointerType(T: Ty).withRestrict();
1080 FunctionArgList Args;
1081 ImplicitParamDecl OmpOutParm(C, /*DC=*/nullptr, Out->getLocation(),
1082 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1083 ImplicitParamDecl OmpInParm(C, /*DC=*/nullptr, In->getLocation(),
1084 /*Id=*/nullptr, PtrTy, ImplicitParamKind::Other);
1085 Args.push_back(Elt: &OmpOutParm);
1086 Args.push_back(Elt: &OmpInParm);
1087 const CGFunctionInfo &FnInfo =
1088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
1089 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
1090 std::string Name = CGM.getOpenMPRuntime().getName(
1091 Parts: {IsCombiner ? "omp_combiner" : "omp_initializer", ""});
1092 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
1093 N: Name, M: &CGM.getModule());
1094 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
1095 if (CGM.getLangOpts().Optimize) {
1096 Fn->removeFnAttr(Kind: llvm::Attribute::NoInline);
1097 Fn->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
1098 Fn->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
1099 }
1100 CodeGenFunction CGF(CGM);
1101 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1102 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1103 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc: In->getLocation(),
1104 StartLoc: Out->getLocation());
1105 CodeGenFunction::OMPPrivateScope Scope(CGF);
1106 Address AddrIn = CGF.GetAddrOfLocalVar(VD: &OmpInParm);
1107 Scope.addPrivate(
1108 LocalVD: In, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrIn, PtrTy: PtrTy->castAs<PointerType>())
1109 .getAddress());
1110 Address AddrOut = CGF.GetAddrOfLocalVar(VD: &OmpOutParm);
1111 Scope.addPrivate(
1112 LocalVD: Out, Addr: CGF.EmitLoadOfPointerLValue(Ptr: AddrOut, PtrTy: PtrTy->castAs<PointerType>())
1113 .getAddress());
1114 (void)Scope.Privatize();
1115 if (!IsCombiner && Out->hasInit() &&
1116 !CGF.isTrivialInitializer(Init: Out->getInit())) {
1117 CGF.EmitAnyExprToMem(E: Out->getInit(), Location: CGF.GetAddrOfLocalVar(VD: Out),
1118 Quals: Out->getType().getQualifiers(),
1119 /*IsInitializer=*/true);
1120 }
1121 if (CombinerInitializer)
1122 CGF.EmitIgnoredExpr(E: CombinerInitializer);
1123 Scope.ForceCleanup();
1124 CGF.FinishFunction();
1125 return Fn;
1126}
1127
1128void CGOpenMPRuntime::emitUserDefinedReduction(
1129 CodeGenFunction *CGF, const OMPDeclareReductionDecl *D) {
1130 if (UDRMap.count(Val: D) > 0)
1131 return;
1132 llvm::Function *Combiner = emitCombinerOrInitializer(
1133 CGM, Ty: D->getType(), CombinerInitializer: D->getCombiner(),
1134 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerIn())->getDecl()),
1135 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getCombinerOut())->getDecl()),
1136 /*IsCombiner=*/true);
1137 llvm::Function *Initializer = nullptr;
1138 if (const Expr *Init = D->getInitializer()) {
1139 Initializer = emitCombinerOrInitializer(
1140 CGM, Ty: D->getType(),
1141 CombinerInitializer: D->getInitializerKind() == OMPDeclareReductionInitKind::Call ? Init
1142 : nullptr,
1143 In: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitOrig())->getDecl()),
1144 Out: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getInitPriv())->getDecl()),
1145 /*IsCombiner=*/false);
1146 }
1147 UDRMap.try_emplace(Key: D, Args&: Combiner, Args&: Initializer);
1148 if (CGF)
1149 FunctionUDRMap[CGF->CurFn].push_back(Elt: D);
1150}
1151
1152std::pair<llvm::Function *, llvm::Function *>
1153CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl *D) {
1154 auto I = UDRMap.find(Val: D);
1155 if (I != UDRMap.end())
1156 return I->second;
1157 emitUserDefinedReduction(/*CGF=*/nullptr, D);
1158 return UDRMap.lookup(Val: D);
1159}
1160
1161namespace {
1162// Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1163// Builder if one is present.
1164struct PushAndPopStackRAII {
1165 PushAndPopStackRAII(llvm::OpenMPIRBuilder *OMPBuilder, CodeGenFunction &CGF,
1166 bool HasCancel, llvm::omp::Directive Kind)
1167 : OMPBuilder(OMPBuilder) {
1168 if (!OMPBuilder)
1169 return;
1170
1171 // The following callback is the crucial part of clangs cleanup process.
1172 //
1173 // NOTE:
1174 // Once the OpenMPIRBuilder is used to create parallel regions (and
1175 // similar), the cancellation destination (Dest below) is determined via
1176 // IP. That means if we have variables to finalize we split the block at IP,
1177 // use the new block (=BB) as destination to build a JumpDest (via
1178 // getJumpDestInCurrentScope(BB)) which then is fed to
1179 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1180 // to push & pop an FinalizationInfo object.
1181 // The FiniCB will still be needed but at the point where the
1182 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1183 auto FiniCB = [&CGF](llvm::OpenMPIRBuilder::InsertPointTy IP) {
1184 assert(IP.getBlock()->end() == IP.getPoint() &&
1185 "Clang CG should cause non-terminated block!");
1186 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1187 CGF.Builder.restoreIP(IP);
1188 CodeGenFunction::JumpDest Dest =
1189 CGF.getOMPCancelDestination(Kind: OMPD_parallel);
1190 CGF.EmitBranchThroughCleanup(Dest);
1191 return llvm::Error::success();
1192 };
1193
1194 // TODO: Remove this once we emit parallel regions through the
1195 // OpenMPIRBuilder as it can do this setup internally.
1196 llvm::OpenMPIRBuilder::FinalizationInfo FI({.FiniCB: FiniCB, .DK: Kind, .IsCancellable: HasCancel});
1197 OMPBuilder->pushFinalizationCB(FI: std::move(FI));
1198 }
1199 ~PushAndPopStackRAII() {
1200 if (OMPBuilder)
1201 OMPBuilder->popFinalizationCB();
1202 }
1203 llvm::OpenMPIRBuilder *OMPBuilder;
1204};
1205} // namespace
1206
1207static llvm::Function *emitParallelOrTeamsOutlinedFunction(
1208 CodeGenModule &CGM, const OMPExecutableDirective &D, const CapturedStmt *CS,
1209 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1210 const StringRef OutlinedHelperName, const RegionCodeGenTy &CodeGen) {
1211 assert(ThreadIDVar->getType()->isPointerType() &&
1212 "thread id variable must be of type kmp_int32 *");
1213 CodeGenFunction CGF(CGM, true);
1214 bool HasCancel = false;
1215 if (const auto *OPD = dyn_cast<OMPParallelDirective>(Val: &D))
1216 HasCancel = OPD->hasCancel();
1217 else if (const auto *OPD = dyn_cast<OMPTargetParallelDirective>(Val: &D))
1218 HasCancel = OPD->hasCancel();
1219 else if (const auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &D))
1220 HasCancel = OPSD->hasCancel();
1221 else if (const auto *OPFD = dyn_cast<OMPParallelForDirective>(Val: &D))
1222 HasCancel = OPFD->hasCancel();
1223 else if (const auto *OPFD = dyn_cast<OMPTargetParallelForDirective>(Val: &D))
1224 HasCancel = OPFD->hasCancel();
1225 else if (const auto *OPFD = dyn_cast<OMPDistributeParallelForDirective>(Val: &D))
1226 HasCancel = OPFD->hasCancel();
1227 else if (const auto *OPFD =
1228 dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &D))
1229 HasCancel = OPFD->hasCancel();
1230 else if (const auto *OPFD =
1231 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &D))
1232 HasCancel = OPFD->hasCancel();
1233
1234 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1235 // parallel region to make cancellation barriers work properly.
1236 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1237 PushAndPopStackRAII PSR(&OMPBuilder, CGF, HasCancel, InnermostKind);
1238 CGOpenMPOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen, InnermostKind,
1239 HasCancel, OutlinedHelperName);
1240 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1241 return CGF.GenerateOpenMPCapturedStmtFunction(S: *CS, Loc: D.getBeginLoc());
1242}
1243
1244std::string CGOpenMPRuntime::getOutlinedHelperName(StringRef Name) const {
1245 std::string Suffix = getName(Parts: {"omp_outlined"});
1246 return (Name + Suffix).str();
1247}
1248
1249std::string CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction &CGF) const {
1250 return getOutlinedHelperName(Name: CGF.CurFn->getName());
1251}
1252
1253std::string CGOpenMPRuntime::getReductionFuncName(StringRef Name) const {
1254 std::string Suffix = getName(Parts: {"omp", "reduction", "reduction_func"});
1255 return (Name + Suffix).str();
1256}
1257
1258llvm::Function *CGOpenMPRuntime::emitParallelOutlinedFunction(
1259 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1260 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1261 const RegionCodeGenTy &CodeGen) {
1262 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_parallel);
1263 return emitParallelOrTeamsOutlinedFunction(
1264 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1265 CodeGen);
1266}
1267
1268llvm::Function *CGOpenMPRuntime::emitTeamsOutlinedFunction(
1269 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1270 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
1271 const RegionCodeGenTy &CodeGen) {
1272 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: OMPD_teams);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM, D, CS, ThreadIDVar, InnermostKind, OutlinedHelperName: getOutlinedHelperName(CGF),
1275 CodeGen);
1276}
1277
1278llvm::Function *CGOpenMPRuntime::emitTaskOutlinedFunction(
1279 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1280 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
1281 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1282 bool Tied, unsigned &NumberOfParts) {
1283 auto &&UntiedCodeGen = [this, &D, TaskTVar](CodeGenFunction &CGF,
1284 PrePostActionTy &) {
1285 llvm::Value *ThreadID = getThreadID(CGF, Loc: D.getBeginLoc());
1286 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
1287 llvm::Value *TaskArgs[] = {
1288 UpLoc, ThreadID,
1289 CGF.EmitLoadOfPointerLValue(Ptr: CGF.GetAddrOfLocalVar(VD: TaskTVar),
1290 PtrTy: TaskTVar->getType()->castAs<PointerType>())
1291 .getPointer(CGF)};
1292 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1293 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
1294 args: TaskArgs);
1295 };
1296 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy Action(Tied, PartIDVar,
1297 UntiedCodeGen);
1298 CodeGen.setAction(Action);
1299 assert(!ThreadIDVar->getType()->isPointerType() &&
1300 "thread id variable must be of type kmp_int32 for tasks");
1301 const OpenMPDirectiveKind Region =
1302 isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) ? OMPD_taskloop
1303 : OMPD_task;
1304 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: Region);
1305 bool HasCancel = false;
1306 if (const auto *TD = dyn_cast<OMPTaskDirective>(Val: &D))
1307 HasCancel = TD->hasCancel();
1308 else if (const auto *TD = dyn_cast<OMPTaskLoopDirective>(Val: &D))
1309 HasCancel = TD->hasCancel();
1310 else if (const auto *TD = dyn_cast<OMPMasterTaskLoopDirective>(Val: &D))
1311 HasCancel = TD->hasCancel();
1312 else if (const auto *TD = dyn_cast<OMPParallelMasterTaskLoopDirective>(Val: &D))
1313 HasCancel = TD->hasCancel();
1314
1315 CodeGenFunction CGF(CGM, true);
1316 CGOpenMPTaskOutlinedRegionInfo CGInfo(*CS, ThreadIDVar, CodeGen,
1317 InnermostKind, HasCancel, Action);
1318 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
1319 llvm::Function *Res = CGF.GenerateCapturedStmtFunction(S: *CS);
1320 if (!Tied)
1321 NumberOfParts = Action.getNumberOfParts();
1322 return Res;
1323}
1324
1325void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction &CGF,
1326 bool AtCurrentPoint) {
1327 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1328 assert(!Elem.ServiceInsertPt && "Insert point is set already.");
1329
1330 llvm::Value *Undef = llvm::UndefValue::get(T: CGF.Int32Ty);
1331 if (AtCurrentPoint) {
1332 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt",
1333 CGF.Builder.GetInsertBlock());
1334 } else {
1335 Elem.ServiceInsertPt = new llvm::BitCastInst(Undef, CGF.Int32Ty, "svcpt");
1336 Elem.ServiceInsertPt->insertAfter(InsertPos: CGF.AllocaInsertPt->getIterator());
1337 }
1338}
1339
1340void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction &CGF) {
1341 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1342 if (Elem.ServiceInsertPt) {
1343 llvm::Instruction *Ptr = Elem.ServiceInsertPt;
1344 Elem.ServiceInsertPt = nullptr;
1345 Ptr->eraseFromParent();
1346 }
1347}
1348
1349static StringRef getIdentStringFromSourceLocation(CodeGenFunction &CGF,
1350 SourceLocation Loc,
1351 SmallString<128> &Buffer) {
1352 llvm::raw_svector_ostream OS(Buffer);
1353 // Build debug location
1354 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1355 OS << ";";
1356 if (auto *DbgInfo = CGF.getDebugInfo())
1357 OS << DbgInfo->remapDIPath(PLoc.getFilename());
1358 else
1359 OS << PLoc.getFilename();
1360 OS << ";";
1361 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1362 OS << FD->getQualifiedNameAsString();
1363 OS << ";" << PLoc.getLine() << ";" << PLoc.getColumn() << ";;";
1364 return OS.str();
1365}
1366
1367llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
1368 SourceLocation Loc,
1369 unsigned Flags, bool EmitLoc) {
1370 uint32_t SrcLocStrSize;
1371 llvm::Constant *SrcLocStr;
1372 if ((!EmitLoc && CGM.getCodeGenOpts().getDebugInfo() ==
1373 llvm::codegenoptions::NoDebugInfo) ||
1374 Loc.isInvalid()) {
1375 SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1376 } else {
1377 std::string FunctionName;
1378 std::string FileName;
1379 if (const auto *FD = dyn_cast_or_null<FunctionDecl>(Val: CGF.CurFuncDecl))
1380 FunctionName = FD->getQualifiedNameAsString();
1381 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
1382 if (auto *DbgInfo = CGF.getDebugInfo())
1383 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
1384 else
1385 FileName = PLoc.getFilename();
1386 unsigned Line = PLoc.getLine();
1387 unsigned Column = PLoc.getColumn();
1388 SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(FunctionName, FileName, Line,
1389 Column, SrcLocStrSize);
1390 }
1391 unsigned Reserved2Flags = getDefaultLocationReserved2Flags();
1392 return OMPBuilder.getOrCreateIdent(
1393 SrcLocStr, SrcLocStrSize, Flags: llvm::omp::IdentFlag(Flags), Reserve2Flags: Reserved2Flags);
1394}
1395
1396llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
1397 SourceLocation Loc) {
1398 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1399 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1400 // the clang invariants used below might be broken.
1401 if (CGM.getLangOpts().OpenMPIRBuilder) {
1402 SmallString<128> Buffer;
1403 OMPBuilder.updateToLocation(Loc: CGF.Builder.saveIP());
1404 uint32_t SrcLocStrSize;
1405 auto *SrcLocStr = OMPBuilder.getOrCreateSrcLocStr(
1406 LocStr: getIdentStringFromSourceLocation(CGF, Loc, Buffer), SrcLocStrSize);
1407 return OMPBuilder.getOrCreateThreadID(
1408 Ident: OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize));
1409 }
1410
1411 llvm::Value *ThreadID = nullptr;
1412 // Check whether we've already cached a load of the thread id in this
1413 // function.
1414 auto I = OpenMPLocThreadIDMap.find(Val: CGF.CurFn);
1415 if (I != OpenMPLocThreadIDMap.end()) {
1416 ThreadID = I->second.ThreadID;
1417 if (ThreadID != nullptr)
1418 return ThreadID;
1419 }
1420 // If exceptions are enabled, do not use parameter to avoid possible crash.
1421 if (auto *OMPRegionInfo =
1422 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
1423 if (OMPRegionInfo->getThreadIDVariable()) {
1424 // Check if this an outlined function with thread id passed as argument.
1425 LValue LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
1426 llvm::BasicBlock *TopBlock = CGF.AllocaInsertPt->getParent();
1427 if (!CGF.EHStack.requiresLandingPad() || !CGF.getLangOpts().Exceptions ||
1428 !CGF.getLangOpts().CXXExceptions ||
1429 CGF.Builder.GetInsertBlock() == TopBlock ||
1430 !isa<llvm::Instruction>(Val: LVal.getPointer(CGF)) ||
1431 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1432 TopBlock ||
1433 cast<llvm::Instruction>(Val: LVal.getPointer(CGF))->getParent() ==
1434 CGF.Builder.GetInsertBlock()) {
1435 ThreadID = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
1436 // If value loaded in entry block, cache it and use it everywhere in
1437 // function.
1438 if (CGF.Builder.GetInsertBlock() == TopBlock)
1439 OpenMPLocThreadIDMap[CGF.CurFn].ThreadID = ThreadID;
1440 return ThreadID;
1441 }
1442 }
1443 }
1444
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1448 // function.
1449 auto &Elem = OpenMPLocThreadIDMap[CGF.CurFn];
1450 if (!Elem.ServiceInsertPt)
1451 setLocThreadIdInsertPt(CGF);
1452 CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
1453 CGF.Builder.SetInsertPoint(Elem.ServiceInsertPt);
1454 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
1455 llvm::CallInst *Call = CGF.Builder.CreateCall(
1456 Callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
1457 FnID: OMPRTL___kmpc_global_thread_num),
1458 Args: emitUpdateLocation(CGF, Loc));
1459 Call->setCallingConv(CGF.getRuntimeCC());
1460 Elem.ThreadID = Call;
1461 return Call;
1462}
1463
1464void CGOpenMPRuntime::functionFinished(CodeGenFunction &CGF) {
1465 assert(CGF.CurFn && "No function in current CodeGenFunction.");
1466 if (OpenMPLocThreadIDMap.count(Val: CGF.CurFn)) {
1467 clearLocThreadIdInsertPt(CGF);
1468 OpenMPLocThreadIDMap.erase(Val: CGF.CurFn);
1469 }
1470 if (auto I = FunctionUDRMap.find(Val: CGF.CurFn); I != FunctionUDRMap.end()) {
1471 for (const auto *D : I->second)
1472 UDRMap.erase(Val: D);
1473 FunctionUDRMap.erase(I);
1474 }
1475 if (auto I = FunctionUDMMap.find(Val: CGF.CurFn); I != FunctionUDMMap.end()) {
1476 for (const auto *D : I->second)
1477 UDMMap.erase(Val: D);
1478 FunctionUDMMap.erase(I);
1479 }
1480 LastprivateConditionalToTypes.erase(Val: CGF.CurFn);
1481 FunctionToUntiedTaskStackMap.erase(Val: CGF.CurFn);
1482}
1483
1484llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder.IdentPtr;
1486}
1487
1488static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489convertDeviceClause(const VarDecl *VD) {
1490 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD);
1492 if (!DevTy)
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1494
1495 switch ((int)*DevTy) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost;
1498 break;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost;
1501 break;
1502 case OMPDeclareTargetDeclAttr::DT_Any:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny;
1504 break;
1505 default:
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone;
1507 break;
1508 }
1509}
1510
1511static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512convertCaptureClause(const VarDecl *VD) {
1513 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> MapType =
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
1515 if (!MapType)
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1517 switch ((int)*MapType) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
1520 break;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter;
1523 break;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
1526 break;
1527 default:
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone;
1529 break;
1530 }
1531}
1532
1533static llvm::TargetRegionEntryInfo getEntryInfoFromPresumedLoc(
1534 CodeGenModule &CGM, llvm::OpenMPIRBuilder &OMPBuilder,
1535 SourceLocation BeginLoc, llvm::StringRef ParentName = "") {
1536
1537 auto FileInfoCallBack = [&]() {
1538 SourceManager &SM = CGM.getContext().getSourceManager();
1539 PresumedLoc PLoc = SM.getPresumedLoc(Loc: BeginLoc);
1540
1541 llvm::sys::fs::UniqueID ID;
1542 if (llvm::sys::fs::getUniqueID(Path: PLoc.getFilename(), Result&: ID)) {
1543 PLoc = SM.getPresumedLoc(Loc: BeginLoc, /*UseLineDirectives=*/false);
1544 }
1545
1546 return std::pair<std::string, uint64_t>(PLoc.getFilename(), PLoc.getLine());
1547 };
1548
1549 return OMPBuilder.getTargetEntryUniqueInfo(CallBack: FileInfoCallBack, ParentName);
1550}
1551
1552ConstantAddress CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl *VD) {
1553 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
1554
1555 auto LinkageForVariable = [&VD, this]() {
1556 return CGM.getLLVMLinkageVarDefinition(VD);
1557 };
1558
1559 std::vector<llvm::GlobalVariable *> GeneratedRefs;
1560
1561 llvm::Type *LlvmPtrTy = CGM.getTypes().ConvertTypeForMem(
1562 T: CGM.getContext().getPointerType(T: VD->getType()));
1563 llvm::Constant *addr = OMPBuilder.getAddrOfDeclareTargetVar(
1564 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
1565 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
1566 IsExternallyVisible: VD->isExternallyVisible(),
1567 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
1568 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
1569 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
1570 TargetTriple: CGM.getLangOpts().OMPTargetTriples, LlvmPtrTy, GlobalInitializer: AddrOfGlobal,
1571 VariableLinkage: LinkageForVariable);
1572
1573 if (!addr)
1574 return ConstantAddress::invalid();
1575 return ConstantAddress(addr, LlvmPtrTy, CGM.getContext().getDeclAlign(D: VD));
1576}
1577
1578llvm::Constant *
1579CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
1580 assert(!CGM.getLangOpts().OpenMPUseTLS ||
1581 !CGM.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix = getName(Parts: {"cache", ""});
1584 return OMPBuilder.getOrCreateInternalVariable(
1585 Ty: CGM.Int8PtrPtrTy, Name: Twine(CGM.getMangledName(GD: VD)).concat(Suffix).str());
1586}
1587
1588Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
1589 const VarDecl *VD,
1590 Address VDAddr,
1591 SourceLocation Loc) {
1592 if (CGM.getLangOpts().OpenMPUseTLS &&
1593 CGM.getContext().getTargetInfo().isTLSSupported())
1594 return VDAddr;
1595
1596 llvm::Type *VarTy = VDAddr.getElementType();
1597 llvm::Value *Args[] = {
1598 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1599 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy),
1600 CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)),
1601 getOrCreateThreadPrivateCache(VD)};
1602 return Address(
1603 CGF.EmitRuntimeCall(
1604 callee: OMPBuilder.getOrCreateRuntimeFunction(
1605 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1606 args: Args),
1607 CGF.Int8Ty, VDAddr.getAlignment());
1608}
1609
1610void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
1612 llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1614 // library.
1615 llvm::Value *OMPLoc = emitUpdateLocation(CGF, Loc);
1616 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1617 M&: CGM.getModule(), FnID: OMPRTL___kmpc_global_thread_num),
1618 args: OMPLoc);
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value *Args[] = {
1622 OMPLoc,
1623 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy),
1624 Ctor, CopyCtor, Dtor};
1625 CGF.EmitRuntimeCall(
1626 callee: OMPBuilder.getOrCreateRuntimeFunction(
1627 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_register),
1628 args: Args);
1629}
1630
1631llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1632 const VarDecl *VD, Address VDAddr, SourceLocation Loc,
1633 bool PerformInit, CodeGenFunction *CGF) {
1634 if (CGM.getLangOpts().OpenMPUseTLS &&
1635 CGM.getContext().getTargetInfo().isTLSSupported())
1636 return nullptr;
1637
1638 VD = VD->getDefinition(C&: CGM.getContext());
1639 if (VD && ThreadPrivateWithDefinition.insert(key: CGM.getMangledName(GD: VD)).second) {
1640 QualType ASTTy = VD->getType();
1641
1642 llvm::Value *Ctor = nullptr, *CopyCtor = nullptr, *Dtor = nullptr;
1643 const Expr *Init = VD->getAnyInitializer();
1644 if (CGM.getLangOpts().CPlusPlus && PerformInit) {
1645 // Generate function that re-emits the declaration's initializer into the
1646 // threadprivate copy of the variable VD
1647 CodeGenFunction CtorCGF(CGM);
1648 FunctionArgList Args;
1649 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1650 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1651 ImplicitParamKind::Other);
1652 Args.push_back(Elt: &Dst);
1653
1654 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1655 resultType: CGM.getContext().VoidPtrTy, args: Args);
1656 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1657 std::string Name = getName(Parts: {"__kmpc_global_ctor_", ""});
1658 llvm::Function *Fn =
1659 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1660 CtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidPtrTy, Fn, FnInfo: FI,
1661 Args, Loc, StartLoc: Loc);
1662 llvm::Value *ArgVal = CtorCGF.EmitLoadOfScalar(
1663 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1664 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1665 Address Arg(ArgVal, CtorCGF.ConvertTypeForMem(T: ASTTy),
1666 VDAddr.getAlignment());
1667 CtorCGF.EmitAnyExprToMem(E: Init, Location: Arg, Quals: Init->getType().getQualifiers(),
1668 /*IsInitializer=*/true);
1669 ArgVal = CtorCGF.EmitLoadOfScalar(
1670 Addr: CtorCGF.GetAddrOfLocalVar(VD: &Dst), /*Volatile=*/false,
1671 Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1672 CtorCGF.Builder.CreateStore(Val: ArgVal, Addr: CtorCGF.ReturnValue);
1673 CtorCGF.FinishFunction();
1674 Ctor = Fn;
1675 }
1676 if (VD->getType().isDestructedType() != QualType::DK_none) {
1677 // Generate function that emits destructor call for the threadprivate copy
1678 // of the variable VD
1679 CodeGenFunction DtorCGF(CGM);
1680 FunctionArgList Args;
1681 ImplicitParamDecl Dst(CGM.getContext(), /*DC=*/nullptr, Loc,
1682 /*Id=*/nullptr, CGM.getContext().VoidPtrTy,
1683 ImplicitParamKind::Other);
1684 Args.push_back(Elt: &Dst);
1685
1686 const auto &FI = CGM.getTypes().arrangeBuiltinFunctionDeclaration(
1687 resultType: CGM.getContext().VoidTy, args: Args);
1688 llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(Info: FI);
1689 std::string Name = getName(Parts: {"__kmpc_global_dtor_", ""});
1690 llvm::Function *Fn =
1691 CGM.CreateGlobalInitOrCleanUpFunction(ty: FTy, name: Name, FI, Loc);
1692 auto NL = ApplyDebugLocation::CreateEmpty(CGF&: DtorCGF);
1693 DtorCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn, FnInfo: FI, Args,
1694 Loc, StartLoc: Loc);
1695 // Create a scope with an artificial location for the body of this function.
1696 auto AL = ApplyDebugLocation::CreateArtificial(CGF&: DtorCGF);
1697 llvm::Value *ArgVal = DtorCGF.EmitLoadOfScalar(
1698 Addr: DtorCGF.GetAddrOfLocalVar(VD: &Dst),
1699 /*Volatile=*/false, Ty: CGM.getContext().VoidPtrTy, Loc: Dst.getLocation());
1700 DtorCGF.emitDestroy(
1701 addr: Address(ArgVal, DtorCGF.Int8Ty, VDAddr.getAlignment()), type: ASTTy,
1702 destroyer: DtorCGF.getDestroyer(destructionKind: ASTTy.isDestructedType()),
1703 useEHCleanupForArray: DtorCGF.needsEHCleanup(kind: ASTTy.isDestructedType()));
1704 DtorCGF.FinishFunction();
1705 Dtor = Fn;
1706 }
1707 // Do not emit init function if it is not required.
1708 if (!Ctor && !Dtor)
1709 return nullptr;
1710
1711 // Copying constructor for the threadprivate variable.
1712 // Must be NULL - reserved by runtime, but currently it requires that this
1713 // parameter is always NULL. Otherwise it fires assertion.
1714 CopyCtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1715 if (Ctor == nullptr) {
1716 Ctor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1717 }
1718 if (Dtor == nullptr) {
1719 Dtor = llvm::Constant::getNullValue(Ty: CGM.UnqualPtrTy);
1720 }
1721 if (!CGF) {
1722 auto *InitFunctionTy =
1723 llvm::FunctionType::get(Result: CGM.VoidTy, /*isVarArg*/ false);
1724 std::string Name = getName(Parts: {"__omp_threadprivate_init_", ""});
1725 llvm::Function *InitFunction = CGM.CreateGlobalInitOrCleanUpFunction(
1726 ty: InitFunctionTy, name: Name, FI: CGM.getTypes().arrangeNullaryFunction());
1727 CodeGenFunction InitCGF(CGM);
1728 FunctionArgList ArgList;
1729 InitCGF.StartFunction(GD: GlobalDecl(), RetTy: CGM.getContext().VoidTy, Fn: InitFunction,
1730 FnInfo: CGM.getTypes().arrangeNullaryFunction(), Args: ArgList,
1731 Loc, StartLoc: Loc);
1732 emitThreadPrivateVarInit(CGF&: InitCGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1733 InitCGF.FinishFunction();
1734 return InitFunction;
1735 }
1736 emitThreadPrivateVarInit(CGF&: *CGF, VDAddr, Ctor, CopyCtor, Dtor, Loc);
1737 }
1738 return nullptr;
1739}
1740
1741void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl *FD,
1742 llvm::GlobalValue *GV) {
1743 std::optional<OMPDeclareTargetDeclAttr *> ActiveAttr =
1744 OMPDeclareTargetDeclAttr::getActiveAttr(VD: FD);
1745
1746 // We only need to handle active 'indirect' declare target functions.
1747 if (!ActiveAttr || !(*ActiveAttr)->getIndirect())
1748 return;
1749
1750 // Get a mangled name to store the new device global in.
1751 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
1752 CGM, OMPBuilder, BeginLoc: FD->getCanonicalDecl()->getBeginLoc(), ParentName: FD->getName());
1753 SmallString<128> Name;
1754 OMPBuilder.OffloadInfoManager.getTargetRegionEntryFnName(Name, EntryInfo);
1755
1756 // We need to generate a new global to hold the address of the indirectly
1757 // called device function. Doing this allows us to keep the visibility and
1758 // linkage of the associated function unchanged while allowing the runtime to
1759 // access its value.
1760 llvm::GlobalValue *Addr = GV;
1761 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
1762 Addr = new llvm::GlobalVariable(
1763 CGM.getModule(), CGM.VoidPtrTy,
1764 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage, GV, Name,
1765 nullptr, llvm::GlobalValue::NotThreadLocal,
1766 CGM.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1767 Addr->setVisibility(llvm::GlobalValue::ProtectedVisibility);
1768 }
1769
1770 OMPBuilder.OffloadInfoManager.registerDeviceGlobalVarEntryInfo(
1771 VarName: Name, Addr, VarSize: CGM.GetTargetTypeStoreSize(Ty: CGM.VoidPtrTy).getQuantity(),
1772 Flags: llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect,
1773 Linkage: llvm::GlobalValue::WeakODRLinkage);
1774}
1775
1776Address CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction &CGF,
1777 QualType VarType,
1778 StringRef Name) {
1779 std::string Suffix = getName(Parts: {"artificial", ""});
1780 llvm::Type *VarLVType = CGF.ConvertTypeForMem(T: VarType);
1781 llvm::GlobalVariable *GAddr = OMPBuilder.getOrCreateInternalVariable(
1782 Ty: VarLVType, Name: Twine(Name).concat(Suffix).str());
1783 if (CGM.getLangOpts().OpenMP && CGM.getLangOpts().OpenMPUseTLS &&
1784 CGM.getTarget().isTLSSupported()) {
1785 GAddr->setThreadLocal(/*Val=*/true);
1786 return Address(GAddr, GAddr->getValueType(),
1787 CGM.getContext().getTypeAlignInChars(T: VarType));
1788 }
1789 std::string CacheSuffix = getName(Parts: {"cache", ""});
1790 llvm::Value *Args[] = {
1791 emitUpdateLocation(CGF, Loc: SourceLocation()),
1792 getThreadID(CGF, Loc: SourceLocation()),
1793 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: GAddr, DestTy: CGM.VoidPtrTy),
1794 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: VarType), DestTy: CGM.SizeTy,
1795 /*isSigned=*/false),
1796 OMPBuilder.getOrCreateInternalVariable(
1797 Ty: CGM.VoidPtrPtrTy,
1798 Name: Twine(Name).concat(Suffix).concat(Suffix: CacheSuffix).str())};
1799 return Address(
1800 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1801 V: CGF.EmitRuntimeCall(
1802 callee: OMPBuilder.getOrCreateRuntimeFunction(
1803 M&: CGM.getModule(), FnID: OMPRTL___kmpc_threadprivate_cached),
1804 args: Args),
1805 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
1806 VarLVType, CGM.getContext().getTypeAlignInChars(T: VarType));
1807}
1808
1809void CGOpenMPRuntime::emitIfClause(CodeGenFunction &CGF, const Expr *Cond,
1810 const RegionCodeGenTy &ThenGen,
1811 const RegionCodeGenTy &ElseGen) {
1812 CodeGenFunction::LexicalScope ConditionScope(CGF, Cond->getSourceRange());
1813
1814 // If the condition constant folds and can be elided, try to avoid emitting
1815 // the condition and the dead arm of the if/else.
1816 bool CondConstant;
1817 if (CGF.ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) {
1818 if (CondConstant)
1819 ThenGen(CGF);
1820 else
1821 ElseGen(CGF);
1822 return;
1823 }
1824
1825 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1826 // emit the conditional branch.
1827 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1828 llvm::BasicBlock *ElseBlock = CGF.createBasicBlock(name: "omp_if.else");
1829 llvm::BasicBlock *ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1830 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock: ThenBlock, FalseBlock: ElseBlock, /*TrueCount=*/0);
1831
1832 // Emit the 'then' code.
1833 CGF.EmitBlock(BB: ThenBlock);
1834 ThenGen(CGF);
1835 CGF.EmitBranch(Block: ContBlock);
1836 // Emit the 'else' code if present.
1837 // There is no need to emit line number for unconditional branch.
1838 (void)ApplyDebugLocation::CreateEmpty(CGF);
1839 CGF.EmitBlock(BB: ElseBlock);
1840 ElseGen(CGF);
1841 // There is no need to emit line number for unconditional branch.
1842 (void)ApplyDebugLocation::CreateEmpty(CGF);
1843 CGF.EmitBranch(Block: ContBlock);
1844 // Emit the continuation block for code after the if.
1845 CGF.EmitBlock(BB: ContBlock, /*IsFinished=*/true);
1846}
1847
1848void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
1849 llvm::Function *OutlinedFn,
1850 ArrayRef<llvm::Value *> CapturedVars,
1851 const Expr *IfCond,
1852 llvm::Value *NumThreads) {
1853 if (!CGF.HaveInsertPoint())
1854 return;
1855 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
1856 auto &M = CGM.getModule();
1857 auto &&ThenGen = [&M, OutlinedFn, CapturedVars, RTLoc,
1858 this](CodeGenFunction &CGF, PrePostActionTy &) {
1859 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1860 llvm::Value *Args[] = {
1861 RTLoc,
1862 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
1863 OutlinedFn};
1864 llvm::SmallVector<llvm::Value *, 16> RealArgs;
1865 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
1866 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1867
1868 llvm::FunctionCallee RTLFn =
1869 OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_fork_call);
1870 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
1871 };
1872 auto &&ElseGen = [&M, OutlinedFn, CapturedVars, RTLoc, Loc,
1873 this](CodeGenFunction &CGF, PrePostActionTy &) {
1874 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
1875 llvm::Value *ThreadID = RT.getThreadID(CGF, Loc);
1876 // Build calls:
1877 // __kmpc_serialized_parallel(&Loc, GTid);
1878 llvm::Value *Args[] = {RTLoc, ThreadID};
1879 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1880 M, FnID: OMPRTL___kmpc_serialized_parallel),
1881 args: Args);
1882
1883 // OutlinedFn(&GTid, &zero_bound, CapturedStruct);
1884 Address ThreadIDAddr = RT.emitThreadIDAddress(CGF, Loc);
1885 RawAddress ZeroAddrBound =
1886 CGF.CreateDefaultAlignTempAlloca(Ty: CGF.Int32Ty,
1887 /*Name=*/".bound.zero.addr");
1888 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(/*C*/ 0), Addr: ZeroAddrBound);
1889 llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
1890 // ThreadId for serialized parallels is 0.
1891 OutlinedFnArgs.push_back(Elt: ThreadIDAddr.emitRawPointer(CGF));
1892 OutlinedFnArgs.push_back(Elt: ZeroAddrBound.getPointer());
1893 OutlinedFnArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
1894
1895 // Ensure we do not inline the function. This is trivially true for the ones
1896 // passed to __kmpc_fork_call but the ones called in serialized regions
1897 // could be inlined. This is not a perfect but it is closer to the invariant
1898 // we want, namely, every data environment starts with a new function.
1899 // TODO: We should pass the if condition to the runtime function and do the
1900 // handling there. Much cleaner code.
1901 OutlinedFn->removeFnAttr(Kind: llvm::Attribute::AlwaysInline);
1902 OutlinedFn->addFnAttr(Kind: llvm::Attribute::NoInline);
1903 RT.emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, Args: OutlinedFnArgs);
1904
1905 // __kmpc_end_serialized_parallel(&Loc, GTid);
1906 llvm::Value *EndArgs[] = {RT.emitUpdateLocation(CGF, Loc), ThreadID};
1907 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
1908 M, FnID: OMPRTL___kmpc_end_serialized_parallel),
1909 args: EndArgs);
1910 };
1911 if (IfCond) {
1912 emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
1913 } else {
1914 RegionCodeGenTy ThenRCG(ThenGen);
1915 ThenRCG(CGF);
1916 }
1917}
1918
1919// If we're inside an (outlined) parallel region, use the region info's
1920// thread-ID variable (it is passed in a first argument of the outlined function
1921// as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1922// regular serial code region, get thread ID by calling kmp_int32
1923// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1924// return the address of that temp.
1925Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
1926 SourceLocation Loc) {
1927 if (auto *OMPRegionInfo =
1928 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
1929 if (OMPRegionInfo->getThreadIDVariable())
1930 return OMPRegionInfo->getThreadIDVariableLValue(CGF).getAddress();
1931
1932 llvm::Value *ThreadID = getThreadID(CGF, Loc);
1933 QualType Int32Ty =
1934 CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1935 Address ThreadIDTemp = CGF.CreateMemTemp(T: Int32Ty, /*Name*/ ".threadid_temp.");
1936 CGF.EmitStoreOfScalar(value: ThreadID,
1937 lvalue: CGF.MakeAddrLValue(Addr: ThreadIDTemp, T: Int32Ty));
1938
1939 return ThreadIDTemp;
1940}
1941
1942llvm::Value *CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName) {
1943 std::string Prefix = Twine("gomp_critical_user_", CriticalName).str();
1944 std::string Name = getName(Parts: {Prefix, "var"});
1945 return OMPBuilder.getOrCreateInternalVariable(Ty: KmpCriticalNameTy, Name);
1946}
1947
1948namespace {
1949/// Common pre(post)-action for different OpenMP constructs.
1950class CommonActionTy final : public PrePostActionTy {
1951 llvm::FunctionCallee EnterCallee;
1952 ArrayRef<llvm::Value *> EnterArgs;
1953 llvm::FunctionCallee ExitCallee;
1954 ArrayRef<llvm::Value *> ExitArgs;
1955 bool Conditional;
1956 llvm::BasicBlock *ContBlock = nullptr;
1957
1958public:
1959 CommonActionTy(llvm::FunctionCallee EnterCallee,
1960 ArrayRef<llvm::Value *> EnterArgs,
1961 llvm::FunctionCallee ExitCallee,
1962 ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
1963 : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
1964 ExitArgs(ExitArgs), Conditional(Conditional) {}
1965 void Enter(CodeGenFunction &CGF) override {
1966 llvm::Value *EnterRes = CGF.EmitRuntimeCall(callee: EnterCallee, args: EnterArgs);
1967 if (Conditional) {
1968 llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(Arg: EnterRes);
1969 auto *ThenBlock = CGF.createBasicBlock(name: "omp_if.then");
1970 ContBlock = CGF.createBasicBlock(name: "omp_if.end");
1971 // Generate the branch (If-stmt)
1972 CGF.Builder.CreateCondBr(Cond: CallBool, True: ThenBlock, False: ContBlock);
1973 CGF.EmitBlock(BB: ThenBlock);
1974 }
1975 }
1976 void Done(CodeGenFunction &CGF) {
1977 // Emit the rest of blocks/branches
1978 CGF.EmitBranch(Block: ContBlock);
1979 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
1980 }
1981 void Exit(CodeGenFunction &CGF) override {
1982 CGF.EmitRuntimeCall(callee: ExitCallee, args: ExitArgs);
1983 }
1984};
1985} // anonymous namespace
1986
1987void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction &CGF,
1988 StringRef CriticalName,
1989 const RegionCodeGenTy &CriticalOpGen,
1990 SourceLocation Loc, const Expr *Hint) {
1991 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1992 // CriticalOpGen();
1993 // __kmpc_end_critical(ident_t *, gtid, Lock);
1994 // Prepare arguments and build a call to __kmpc_critical
1995 if (!CGF.HaveInsertPoint())
1996 return;
1997 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
1998 getCriticalRegionLock(CriticalName)};
1999 llvm::SmallVector<llvm::Value *, 4> EnterArgs(std::begin(arr&: Args),
2000 std::end(arr&: Args));
2001 if (Hint) {
2002 EnterArgs.push_back(Elt: CGF.Builder.CreateIntCast(
2003 V: CGF.EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, /*isSigned=*/false));
2004 }
2005 CommonActionTy Action(
2006 OMPBuilder.getOrCreateRuntimeFunction(
2007 M&: CGM.getModule(),
2008 FnID: Hint ? OMPRTL___kmpc_critical_with_hint : OMPRTL___kmpc_critical),
2009 EnterArgs,
2010 OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2011 FnID: OMPRTL___kmpc_end_critical),
2012 Args);
2013 CriticalOpGen.setAction(Action);
2014 emitInlinedDirective(CGF, InnermostKind: OMPD_critical, CodeGen: CriticalOpGen);
2015}
2016
2017void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction &CGF,
2018 const RegionCodeGenTy &MasterOpGen,
2019 SourceLocation Loc) {
2020 if (!CGF.HaveInsertPoint())
2021 return;
2022 // if(__kmpc_master(ident_t *, gtid)) {
2023 // MasterOpGen();
2024 // __kmpc_end_master(ident_t *, gtid);
2025 // }
2026 // Prepare arguments and build a call to __kmpc_master
2027 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2028 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2029 M&: CGM.getModule(), FnID: OMPRTL___kmpc_master),
2030 Args,
2031 OMPBuilder.getOrCreateRuntimeFunction(
2032 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_master),
2033 Args,
2034 /*Conditional=*/true);
2035 MasterOpGen.setAction(Action);
2036 emitInlinedDirective(CGF, InnermostKind: OMPD_master, CodeGen: MasterOpGen);
2037 Action.Done(CGF);
2038}
2039
2040void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction &CGF,
2041 const RegionCodeGenTy &MaskedOpGen,
2042 SourceLocation Loc, const Expr *Filter) {
2043 if (!CGF.HaveInsertPoint())
2044 return;
2045 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2046 // MaskedOpGen();
2047 // __kmpc_end_masked(iden_t *, gtid);
2048 // }
2049 // Prepare arguments and build a call to __kmpc_masked
2050 llvm::Value *FilterVal = Filter
2051 ? CGF.EmitScalarExpr(E: Filter, IgnoreResultAssign: CGF.Int32Ty)
2052 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
2053 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2054 FilterVal};
2055 llvm::Value *ArgsEnd[] = {emitUpdateLocation(CGF, Loc),
2056 getThreadID(CGF, Loc)};
2057 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2058 M&: CGM.getModule(), FnID: OMPRTL___kmpc_masked),
2059 Args,
2060 OMPBuilder.getOrCreateRuntimeFunction(
2061 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_masked),
2062 ArgsEnd,
2063 /*Conditional=*/true);
2064 MaskedOpGen.setAction(Action);
2065 emitInlinedDirective(CGF, InnermostKind: OMPD_masked, CodeGen: MaskedOpGen);
2066 Action.Done(CGF);
2067}
2068
2069void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
2070 SourceLocation Loc) {
2071 if (!CGF.HaveInsertPoint())
2072 return;
2073 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2074 OMPBuilder.createTaskyield(Loc: CGF.Builder);
2075 } else {
2076 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2077 llvm::Value *Args[] = {
2078 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2079 llvm::ConstantInt::get(Ty: CGM.IntTy, /*V=*/0, /*isSigned=*/IsSigned: true)};
2080 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2081 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_taskyield),
2082 args: Args);
2083 }
2084
2085 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
2086 Region->emitUntiedSwitch(CGF);
2087}
2088
2089void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
2090 const RegionCodeGenTy &TaskgroupOpGen,
2091 SourceLocation Loc) {
2092 if (!CGF.HaveInsertPoint())
2093 return;
2094 // __kmpc_taskgroup(ident_t *, gtid);
2095 // TaskgroupOpGen();
2096 // __kmpc_end_taskgroup(ident_t *, gtid);
2097 // Prepare arguments and build a call to __kmpc_taskgroup
2098 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2099 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2100 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskgroup),
2101 Args,
2102 OMPBuilder.getOrCreateRuntimeFunction(
2103 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_taskgroup),
2104 Args);
2105 TaskgroupOpGen.setAction(Action);
2106 emitInlinedDirective(CGF, InnermostKind: OMPD_taskgroup, CodeGen: TaskgroupOpGen);
2107}
2108
2109/// Given an array of pointers to variables, project the address of a
2110/// given variable.
2111static Address emitAddrOfVarFromArray(CodeGenFunction &CGF, Address Array,
2112 unsigned Index, const VarDecl *Var) {
2113 // Pull out the pointer to the variable.
2114 Address PtrAddr = CGF.Builder.CreateConstArrayGEP(Addr: Array, Index);
2115 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: PtrAddr);
2116
2117 llvm::Type *ElemTy = CGF.ConvertTypeForMem(T: Var->getType());
2118 return Address(Ptr, ElemTy, CGF.getContext().getDeclAlign(D: Var));
2119}
2120
2121static llvm::Value *emitCopyprivateCopyFunction(
2122 CodeGenModule &CGM, llvm::Type *ArgsElemType,
2123 ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
2124 ArrayRef<const Expr *> SrcExprs, ArrayRef<const Expr *> AssignmentOps,
2125 SourceLocation Loc) {
2126 ASTContext &C = CGM.getContext();
2127 // void copy_func(void *LHSArg, void *RHSArg);
2128 FunctionArgList Args;
2129 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2130 ImplicitParamKind::Other);
2131 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
2132 ImplicitParamKind::Other);
2133 Args.push_back(Elt: &LHSArg);
2134 Args.push_back(Elt: &RHSArg);
2135 const auto &CGFI =
2136 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
2137 std::string Name =
2138 CGM.getOpenMPRuntime().getName(Parts: {"omp", "copyprivate", "copy_func"});
2139 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
2140 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
2141 M: &CGM.getModule());
2142 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
2143 Fn->setDoesNotRecurse();
2144 CodeGenFunction CGF(CGM);
2145 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
2146 // Dest = (void*[n])(LHSArg);
2147 // Src = (void*[n])(RHSArg);
2148 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2149 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
2150 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2151 ArgsElemType, CGF.getPointerAlign());
2152 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2153 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
2154 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
2155 ArgsElemType, CGF.getPointerAlign());
2156 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2157 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2158 // ...
2159 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2160 for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
2161 const auto *DestVar =
2162 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: DestExprs[I])->getDecl());
2163 Address DestAddr = emitAddrOfVarFromArray(CGF, Array: LHS, Index: I, Var: DestVar);
2164
2165 const auto *SrcVar =
2166 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: SrcExprs[I])->getDecl());
2167 Address SrcAddr = emitAddrOfVarFromArray(CGF, Array: RHS, Index: I, Var: SrcVar);
2168
2169 const auto *VD = cast<DeclRefExpr>(Val: CopyprivateVars[I])->getDecl();
2170 QualType Type = VD->getType();
2171 CGF.EmitOMPCopy(OriginalType: Type, DestAddr, SrcAddr, DestVD: DestVar, SrcVD: SrcVar, Copy: AssignmentOps[I]);
2172 }
2173 CGF.FinishFunction();
2174 return Fn;
2175}
2176
2177void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
2178 const RegionCodeGenTy &SingleOpGen,
2179 SourceLocation Loc,
2180 ArrayRef<const Expr *> CopyprivateVars,
2181 ArrayRef<const Expr *> SrcExprs,
2182 ArrayRef<const Expr *> DstExprs,
2183 ArrayRef<const Expr *> AssignmentOps) {
2184 if (!CGF.HaveInsertPoint())
2185 return;
2186 assert(CopyprivateVars.size() == SrcExprs.size() &&
2187 CopyprivateVars.size() == DstExprs.size() &&
2188 CopyprivateVars.size() == AssignmentOps.size());
2189 ASTContext &C = CGM.getContext();
2190 // int32 did_it = 0;
2191 // if(__kmpc_single(ident_t *, gtid)) {
2192 // SingleOpGen();
2193 // __kmpc_end_single(ident_t *, gtid);
2194 // did_it = 1;
2195 // }
2196 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2197 // <copy_func>, did_it);
2198
2199 Address DidIt = Address::invalid();
2200 if (!CopyprivateVars.empty()) {
2201 // int32 did_it = 0;
2202 QualType KmpInt32Ty =
2203 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2204 DidIt = CGF.CreateMemTemp(T: KmpInt32Ty, Name: ".omp.copyprivate.did_it");
2205 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 0), Addr: DidIt);
2206 }
2207 // Prepare arguments and build a call to __kmpc_single
2208 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2209 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2210 M&: CGM.getModule(), FnID: OMPRTL___kmpc_single),
2211 Args,
2212 OMPBuilder.getOrCreateRuntimeFunction(
2213 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_single),
2214 Args,
2215 /*Conditional=*/true);
2216 SingleOpGen.setAction(Action);
2217 emitInlinedDirective(CGF, InnermostKind: OMPD_single, CodeGen: SingleOpGen);
2218 if (DidIt.isValid()) {
2219 // did_it = 1;
2220 CGF.Builder.CreateStore(Val: CGF.Builder.getInt32(C: 1), Addr: DidIt);
2221 }
2222 Action.Done(CGF);
2223 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2224 // <copy_func>, did_it);
2225 if (DidIt.isValid()) {
2226 llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
2227 QualType CopyprivateArrayTy = C.getConstantArrayType(
2228 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
2229 /*IndexTypeQuals=*/0);
2230 // Create a list of all private variables for copyprivate.
2231 Address CopyprivateList =
2232 CGF.CreateMemTemp(T: CopyprivateArrayTy, Name: ".omp.copyprivate.cpr_list");
2233 for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
2234 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: CopyprivateList, Index: I);
2235 CGF.Builder.CreateStore(
2236 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2237 V: CGF.EmitLValue(E: CopyprivateVars[I]).getPointer(CGF),
2238 DestTy: CGF.VoidPtrTy),
2239 Addr: Elem);
2240 }
2241 // Build function that copies private values from single region to all other
2242 // threads in the corresponding parallel region.
2243 llvm::Value *CpyFn = emitCopyprivateCopyFunction(
2244 CGM, ArgsElemType: CGF.ConvertTypeForMem(T: CopyprivateArrayTy), CopyprivateVars,
2245 DestExprs: SrcExprs, SrcExprs: DstExprs, AssignmentOps, Loc);
2246 llvm::Value *BufSize = CGF.getTypeSize(Ty: CopyprivateArrayTy);
2247 Address CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
2248 Addr: CopyprivateList, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
2249 llvm::Value *DidItVal = CGF.Builder.CreateLoad(Addr: DidIt);
2250 llvm::Value *Args[] = {
2251 emitUpdateLocation(CGF, Loc), // ident_t *<loc>
2252 getThreadID(CGF, Loc), // i32 <gtid>
2253 BufSize, // size_t <buf_size>
2254 CL.emitRawPointer(CGF), // void *<copyprivate list>
2255 CpyFn, // void (*) (void *, void *) <copy_func>
2256 DidItVal // i32 did_it
2257 };
2258 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2259 M&: CGM.getModule(), FnID: OMPRTL___kmpc_copyprivate),
2260 args: Args);
2261 }
2262}
2263
2264void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction &CGF,
2265 const RegionCodeGenTy &OrderedOpGen,
2266 SourceLocation Loc, bool IsThreads) {
2267 if (!CGF.HaveInsertPoint())
2268 return;
2269 // __kmpc_ordered(ident_t *, gtid);
2270 // OrderedOpGen();
2271 // __kmpc_end_ordered(ident_t *, gtid);
2272 // Prepare arguments and build a call to __kmpc_ordered
2273 if (IsThreads) {
2274 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2275 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
2276 M&: CGM.getModule(), FnID: OMPRTL___kmpc_ordered),
2277 Args,
2278 OMPBuilder.getOrCreateRuntimeFunction(
2279 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_ordered),
2280 Args);
2281 OrderedOpGen.setAction(Action);
2282 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2283 return;
2284 }
2285 emitInlinedDirective(CGF, InnermostKind: OMPD_ordered, CodeGen: OrderedOpGen);
2286}
2287
2288unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind) {
2289 unsigned Flags;
2290 if (Kind == OMPD_for)
2291 Flags = OMP_IDENT_BARRIER_IMPL_FOR;
2292 else if (Kind == OMPD_sections)
2293 Flags = OMP_IDENT_BARRIER_IMPL_SECTIONS;
2294 else if (Kind == OMPD_single)
2295 Flags = OMP_IDENT_BARRIER_IMPL_SINGLE;
2296 else if (Kind == OMPD_barrier)
2297 Flags = OMP_IDENT_BARRIER_EXPL;
2298 else
2299 Flags = OMP_IDENT_BARRIER_IMPL;
2300 return Flags;
2301}
2302
2303void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2304 CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 OpenMPScheduleClauseKind &ScheduleKind, const Expr *&ChunkExpr) const {
2306 // Check if the loop directive is actually a doacross loop directive. In this
2307 // case choose static, 1 schedule.
2308 if (llvm::any_of(
2309 Range: S.getClausesOfKind<OMPOrderedClause>(),
2310 P: [](const OMPOrderedClause *C) { return C->getNumForLoops(); })) {
2311 ScheduleKind = OMPC_SCHEDULE_static;
2312 // Chunk size is 1 in this case.
2313 llvm::APInt ChunkSize(32, 1);
2314 ChunkExpr = IntegerLiteral::Create(
2315 C: CGF.getContext(), V: ChunkSize,
2316 type: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
2317 l: SourceLocation());
2318 }
2319}
2320
2321void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
2322 OpenMPDirectiveKind Kind, bool EmitChecks,
2323 bool ForceSimpleCall) {
2324 // Check if we should use the OMPBuilder
2325 auto *OMPRegionInfo =
2326 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo);
2327 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2328 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
2329 cantFail(ValOrErr: OMPBuilder.createBarrier(Loc: CGF.Builder, Kind, ForceSimpleCall,
2330 CheckCancelFlag: EmitChecks));
2331 CGF.Builder.restoreIP(IP: AfterIP);
2332 return;
2333 }
2334
2335 if (!CGF.HaveInsertPoint())
2336 return;
2337 // Build call __kmpc_cancel_barrier(loc, thread_id);
2338 // Build call __kmpc_barrier(loc, thread_id);
2339 unsigned Flags = getDefaultFlagsForBarriers(Kind);
2340 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2341 // thread_id);
2342 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
2343 getThreadID(CGF, Loc)};
2344 if (OMPRegionInfo) {
2345 if (!ForceSimpleCall && OMPRegionInfo->hasCancel()) {
2346 llvm::Value *Result = CGF.EmitRuntimeCall(
2347 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
2348 FnID: OMPRTL___kmpc_cancel_barrier),
2349 args: Args);
2350 if (EmitChecks) {
2351 // if (__kmpc_cancel_barrier()) {
2352 // exit from construct;
2353 // }
2354 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
2355 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
2356 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
2357 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
2358 CGF.EmitBlock(BB: ExitBB);
2359 // exit from construct;
2360 CodeGenFunction::JumpDest CancelDestination =
2361 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
2362 CGF.EmitBranchThroughCleanup(Dest: CancelDestination);
2363 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
2364 }
2365 return;
2366 }
2367 }
2368 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2369 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
2370 args: Args);
2371}
2372
2373void CGOpenMPRuntime::emitErrorCall(CodeGenFunction &CGF, SourceLocation Loc,
2374 Expr *ME, bool IsFatal) {
2375 llvm::Value *MVL =
2376 ME ? CGF.EmitStringLiteralLValue(E: cast<StringLiteral>(Val: ME)).getPointer(CGF)
2377 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
2378 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2379 // *message)
2380 llvm::Value *Args[] = {
2381 emitUpdateLocation(CGF, Loc, /*Flags=*/0, /*GenLoc=*/EmitLoc: true),
2382 llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: IsFatal ? 2 : 1),
2383 CGF.Builder.CreatePointerCast(V: MVL, DestTy: CGM.Int8PtrTy)};
2384 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2385 M&: CGM.getModule(), FnID: OMPRTL___kmpc_error),
2386 args: Args);
2387}
2388
2389/// Map the OpenMP loop schedule to the runtime enumeration.
2390static OpenMPSchedType getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind,
2391 bool Chunked, bool Ordered) {
2392 switch (ScheduleKind) {
2393 case OMPC_SCHEDULE_static:
2394 return Chunked ? (Ordered ? OMP_ord_static_chunked : OMP_sch_static_chunked)
2395 : (Ordered ? OMP_ord_static : OMP_sch_static);
2396 case OMPC_SCHEDULE_dynamic:
2397 return Ordered ? OMP_ord_dynamic_chunked : OMP_sch_dynamic_chunked;
2398 case OMPC_SCHEDULE_guided:
2399 return Ordered ? OMP_ord_guided_chunked : OMP_sch_guided_chunked;
2400 case OMPC_SCHEDULE_runtime:
2401 return Ordered ? OMP_ord_runtime : OMP_sch_runtime;
2402 case OMPC_SCHEDULE_auto:
2403 return Ordered ? OMP_ord_auto : OMP_sch_auto;
2404 case OMPC_SCHEDULE_unknown:
2405 assert(!Chunked && "chunk was specified but schedule kind not known");
2406 return Ordered ? OMP_ord_static : OMP_sch_static;
2407 }
2408 llvm_unreachable("Unexpected runtime schedule");
2409}
2410
2411/// Map the OpenMP distribute schedule to the runtime enumeration.
2412static OpenMPSchedType
2413getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) {
2414 // only static is allowed for dist_schedule
2415 return Chunked ? OMP_dist_sch_static_chunked : OMP_dist_sch_static;
2416}
2417
2418bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind,
2419 bool Chunked) const {
2420 OpenMPSchedType Schedule =
2421 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2422 return Schedule == OMP_sch_static;
2423}
2424
2425bool CGOpenMPRuntime::isStaticNonchunked(
2426 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2427 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2428 return Schedule == OMP_dist_sch_static;
2429}
2430
2431bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind,
2432 bool Chunked) const {
2433 OpenMPSchedType Schedule =
2434 getRuntimeSchedule(ScheduleKind, Chunked, /*Ordered=*/false);
2435 return Schedule == OMP_sch_static_chunked;
2436}
2437
2438bool CGOpenMPRuntime::isStaticChunked(
2439 OpenMPDistScheduleClauseKind ScheduleKind, bool Chunked) const {
2440 OpenMPSchedType Schedule = getRuntimeSchedule(ScheduleKind, Chunked);
2441 return Schedule == OMP_dist_sch_static_chunked;
2442}
2443
2444bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
2445 OpenMPSchedType Schedule =
2446 getRuntimeSchedule(ScheduleKind, /*Chunked=*/false, /*Ordered=*/false);
2447 assert(Schedule != OMP_sch_static_chunked && "cannot be chunked here");
2448 return Schedule != OMP_sch_static;
2449}
2450
2451static int addMonoNonMonoModifier(CodeGenModule &CGM, OpenMPSchedType Schedule,
2452 OpenMPScheduleClauseModifier M1,
2453 OpenMPScheduleClauseModifier M2) {
2454 int Modifier = 0;
2455 switch (M1) {
2456 case OMPC_SCHEDULE_MODIFIER_monotonic:
2457 Modifier = OMP_sch_modifier_monotonic;
2458 break;
2459 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2460 Modifier = OMP_sch_modifier_nonmonotonic;
2461 break;
2462 case OMPC_SCHEDULE_MODIFIER_simd:
2463 if (Schedule == OMP_sch_static_chunked)
2464 Schedule = OMP_sch_static_balanced_chunked;
2465 break;
2466 case OMPC_SCHEDULE_MODIFIER_last:
2467 case OMPC_SCHEDULE_MODIFIER_unknown:
2468 break;
2469 }
2470 switch (M2) {
2471 case OMPC_SCHEDULE_MODIFIER_monotonic:
2472 Modifier = OMP_sch_modifier_monotonic;
2473 break;
2474 case OMPC_SCHEDULE_MODIFIER_nonmonotonic:
2475 Modifier = OMP_sch_modifier_nonmonotonic;
2476 break;
2477 case OMPC_SCHEDULE_MODIFIER_simd:
2478 if (Schedule == OMP_sch_static_chunked)
2479 Schedule = OMP_sch_static_balanced_chunked;
2480 break;
2481 case OMPC_SCHEDULE_MODIFIER_last:
2482 case OMPC_SCHEDULE_MODIFIER_unknown:
2483 break;
2484 }
2485 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2486 // If the static schedule kind is specified or if the ordered clause is
2487 // specified, and if the nonmonotonic modifier is not specified, the effect is
2488 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2489 // modifier is specified, the effect is as if the nonmonotonic modifier is
2490 // specified.
2491 if (CGM.getLangOpts().OpenMP >= 50 && Modifier == 0) {
2492 if (!(Schedule == OMP_sch_static_chunked || Schedule == OMP_sch_static ||
2493 Schedule == OMP_sch_static_balanced_chunked ||
2494 Schedule == OMP_ord_static_chunked || Schedule == OMP_ord_static ||
2495 Schedule == OMP_dist_sch_static_chunked ||
2496 Schedule == OMP_dist_sch_static))
2497 Modifier = OMP_sch_modifier_nonmonotonic;
2498 }
2499 return Schedule | Modifier;
2500}
2501
2502void CGOpenMPRuntime::emitForDispatchInit(
2503 CodeGenFunction &CGF, SourceLocation Loc,
2504 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
2505 bool Ordered, const DispatchRTInput &DispatchValues) {
2506 if (!CGF.HaveInsertPoint())
2507 return;
2508 OpenMPSchedType Schedule = getRuntimeSchedule(
2509 ScheduleKind: ScheduleKind.Schedule, Chunked: DispatchValues.Chunk != nullptr, Ordered);
2510 assert(Ordered ||
2511 (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
2512 Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
2513 Schedule != OMP_sch_static_balanced_chunked));
2514 // Call __kmpc_dispatch_init(
2515 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2516 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2517 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2518
2519 // If the Chunk was not specified in the clause - use default value 1.
2520 llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
2521 : CGF.Builder.getIntN(N: IVSize, C: 1);
2522 llvm::Value *Args[] = {
2523 emitUpdateLocation(CGF, Loc),
2524 getThreadID(CGF, Loc),
2525 CGF.Builder.getInt32(C: addMonoNonMonoModifier(
2526 CGM, Schedule, M1: ScheduleKind.M1, M2: ScheduleKind.M2)), // Schedule type
2527 DispatchValues.LB, // Lower
2528 DispatchValues.UB, // Upper
2529 CGF.Builder.getIntN(N: IVSize, C: 1), // Stride
2530 Chunk // Chunk
2531 };
2532 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchInitFunction(IVSize, IVSigned),
2533 args: Args);
2534}
2535
2536void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
2537 SourceLocation Loc) {
2538 if (!CGF.HaveInsertPoint())
2539 return;
2540 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2541 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2542 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchDeinitFunction(), args: Args);
2543}
2544
2545static void emitForStaticInitCall(
2546 CodeGenFunction &CGF, llvm::Value *UpdateLocation, llvm::Value *ThreadId,
2547 llvm::FunctionCallee ForStaticInitFunction, OpenMPSchedType Schedule,
2548 OpenMPScheduleClauseModifier M1, OpenMPScheduleClauseModifier M2,
2549 const CGOpenMPRuntime::StaticRTInput &Values) {
2550 if (!CGF.HaveInsertPoint())
2551 return;
2552
2553 assert(!Values.Ordered);
2554 assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
2555 Schedule == OMP_sch_static_balanced_chunked ||
2556 Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked ||
2557 Schedule == OMP_dist_sch_static ||
2558 Schedule == OMP_dist_sch_static_chunked);
2559
2560 // Call __kmpc_for_static_init(
2561 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2562 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2563 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2564 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2565 llvm::Value *Chunk = Values.Chunk;
2566 if (Chunk == nullptr) {
2567 assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static ||
2568 Schedule == OMP_dist_sch_static) &&
2569 "expected static non-chunked schedule");
2570 // If the Chunk was not specified in the clause - use default value 1.
2571 Chunk = CGF.Builder.getIntN(N: Values.IVSize, C: 1);
2572 } else {
2573 assert((Schedule == OMP_sch_static_chunked ||
2574 Schedule == OMP_sch_static_balanced_chunked ||
2575 Schedule == OMP_ord_static_chunked ||
2576 Schedule == OMP_dist_sch_static_chunked) &&
2577 "expected static chunked schedule");
2578 }
2579 llvm::Value *Args[] = {
2580 UpdateLocation,
2581 ThreadId,
2582 CGF.Builder.getInt32(C: addMonoNonMonoModifier(CGM&: CGF.CGM, Schedule, M1,
2583 M2)), // Schedule type
2584 Values.IL.emitRawPointer(CGF), // &isLastIter
2585 Values.LB.emitRawPointer(CGF), // &LB
2586 Values.UB.emitRawPointer(CGF), // &UB
2587 Values.ST.emitRawPointer(CGF), // &Stride
2588 CGF.Builder.getIntN(N: Values.IVSize, C: 1), // Incr
2589 Chunk // Chunk
2590 };
2591 CGF.EmitRuntimeCall(callee: ForStaticInitFunction, args: Args);
2592}
2593
2594void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
2595 SourceLocation Loc,
2596 OpenMPDirectiveKind DKind,
2597 const OpenMPScheduleTy &ScheduleKind,
2598 const StaticRTInput &Values) {
2599 OpenMPSchedType ScheduleNum = getRuntimeSchedule(
2600 ScheduleKind: ScheduleKind.Schedule, Chunked: Values.Chunk != nullptr, Ordered: Values.Ordered);
2601 assert((isOpenMPWorksharingDirective(DKind) || (DKind == OMPD_loop)) &&
2602 "Expected loop-based or sections-based directive.");
2603 llvm::Value *UpdatedLocation = emitUpdateLocation(CGF, Loc,
2604 Flags: isOpenMPLoopDirective(DKind)
2605 ? OMP_IDENT_WORK_LOOP
2606 : OMP_IDENT_WORK_SECTIONS);
2607 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2608 llvm::FunctionCallee StaticInitFunction =
2609 OMPBuilder.createForStaticInitFunction(IVSize: Values.IVSize, IVSigned: Values.IVSigned,
2610 IsGPUDistribute: false);
2611 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2612 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2613 Schedule: ScheduleNum, M1: ScheduleKind.M1, M2: ScheduleKind.M2, Values);
2614}
2615
2616void CGOpenMPRuntime::emitDistributeStaticInit(
2617 CodeGenFunction &CGF, SourceLocation Loc,
2618 OpenMPDistScheduleClauseKind SchedKind,
2619 const CGOpenMPRuntime::StaticRTInput &Values) {
2620 OpenMPSchedType ScheduleNum =
2621 getRuntimeSchedule(ScheduleKind: SchedKind, Chunked: Values.Chunk != nullptr);
2622 llvm::Value *UpdatedLocation =
2623 emitUpdateLocation(CGF, Loc, Flags: OMP_IDENT_WORK_DISTRIBUTE);
2624 llvm::Value *ThreadId = getThreadID(CGF, Loc);
2625 llvm::FunctionCallee StaticInitFunction;
2626 bool isGPUDistribute =
2627 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU();
2628 StaticInitFunction = OMPBuilder.createForStaticInitFunction(
2629 IVSize: Values.IVSize, IVSigned: Values.IVSigned, IsGPUDistribute: isGPUDistribute);
2630
2631 emitForStaticInitCall(CGF, UpdateLocation: UpdatedLocation, ThreadId, ForStaticInitFunction: StaticInitFunction,
2632 Schedule: ScheduleNum, M1: OMPC_SCHEDULE_MODIFIER_unknown,
2633 M2: OMPC_SCHEDULE_MODIFIER_unknown, Values);
2634}
2635
2636void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
2637 SourceLocation Loc,
2638 OpenMPDirectiveKind DKind) {
2639 assert((DKind == OMPD_distribute || DKind == OMPD_for ||
2640 DKind == OMPD_sections) &&
2641 "Expected distribute, for, or sections directive kind");
2642 if (!CGF.HaveInsertPoint())
2643 return;
2644 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2645 llvm::Value *Args[] = {
2646 emitUpdateLocation(CGF, Loc,
2647 Flags: isOpenMPDistributeDirective(DKind) ||
2648 (DKind == OMPD_target_teams_loop)
2649 ? OMP_IDENT_WORK_DISTRIBUTE
2650 : isOpenMPLoopDirective(DKind)
2651 ? OMP_IDENT_WORK_LOOP
2652 : OMP_IDENT_WORK_SECTIONS),
2653 getThreadID(CGF, Loc)};
2654 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
2655 if (isOpenMPDistributeDirective(DKind) &&
2656 CGM.getLangOpts().OpenMPIsTargetDevice && CGM.getTriple().isGPU())
2657 CGF.EmitRuntimeCall(
2658 callee: OMPBuilder.getOrCreateRuntimeFunction(
2659 M&: CGM.getModule(), FnID: OMPRTL___kmpc_distribute_static_fini),
2660 args: Args);
2661 else
2662 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2663 M&: CGM.getModule(), FnID: OMPRTL___kmpc_for_static_fini),
2664 args: Args);
2665}
2666
2667void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
2668 SourceLocation Loc,
2669 unsigned IVSize,
2670 bool IVSigned) {
2671 if (!CGF.HaveInsertPoint())
2672 return;
2673 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2674 llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
2675 CGF.EmitRuntimeCall(callee: OMPBuilder.createDispatchFiniFunction(IVSize, IVSigned),
2676 args: Args);
2677}
2678
2679llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
2680 SourceLocation Loc, unsigned IVSize,
2681 bool IVSigned, Address IL,
2682 Address LB, Address UB,
2683 Address ST) {
2684 // Call __kmpc_dispatch_next(
2685 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2686 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2687 // kmp_int[32|64] *p_stride);
2688 llvm::Value *Args[] = {
2689 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2690 IL.emitRawPointer(CGF), // &isLastIter
2691 LB.emitRawPointer(CGF), // &Lower
2692 UB.emitRawPointer(CGF), // &Upper
2693 ST.emitRawPointer(CGF) // &Stride
2694 };
2695 llvm::Value *Call = CGF.EmitRuntimeCall(
2696 callee: OMPBuilder.createDispatchNextFunction(IVSize, IVSigned), args: Args);
2697 return CGF.EmitScalarConversion(
2698 Src: Call, SrcTy: CGF.getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/1),
2699 DstTy: CGF.getContext().BoolTy, Loc);
2700}
2701
2702void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
2703 llvm::Value *NumThreads,
2704 SourceLocation Loc) {
2705 if (!CGF.HaveInsertPoint())
2706 return;
2707 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2708 llvm::Value *Args[] = {
2709 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2710 CGF.Builder.CreateIntCast(V: NumThreads, DestTy: CGF.Int32Ty, /*isSigned*/ true)};
2711 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2712 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_threads),
2713 args: Args);
2714}
2715
2716void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction &CGF,
2717 ProcBindKind ProcBind,
2718 SourceLocation Loc) {
2719 if (!CGF.HaveInsertPoint())
2720 return;
2721 assert(ProcBind != OMP_PROC_BIND_unknown && "Unsupported proc_bind value.");
2722 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2723 llvm::Value *Args[] = {
2724 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
2725 llvm::ConstantInt::get(Ty: CGM.IntTy, V: unsigned(ProcBind), /*isSigned=*/IsSigned: true)};
2726 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2727 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_proc_bind),
2728 args: Args);
2729}
2730
2731void CGOpenMPRuntime::emitFlush(CodeGenFunction &CGF, ArrayRef<const Expr *>,
2732 SourceLocation Loc, llvm::AtomicOrdering AO) {
2733 if (CGF.CGM.getLangOpts().OpenMPIRBuilder) {
2734 OMPBuilder.createFlush(Loc: CGF.Builder);
2735 } else {
2736 if (!CGF.HaveInsertPoint())
2737 return;
2738 // Build call void __kmpc_flush(ident_t *loc)
2739 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
2740 M&: CGM.getModule(), FnID: OMPRTL___kmpc_flush),
2741 args: emitUpdateLocation(CGF, Loc));
2742 }
2743}
2744
2745namespace {
2746/// Indexes of fields for type kmp_task_t.
2747enum KmpTaskTFields {
2748 /// List of shared variables.
2749 KmpTaskTShareds,
2750 /// Task routine.
2751 KmpTaskTRoutine,
2752 /// Partition id for the untied tasks.
2753 KmpTaskTPartId,
2754 /// Function with call of destructors for private variables.
2755 Data1,
2756 /// Task priority.
2757 Data2,
2758 /// (Taskloops only) Lower bound.
2759 KmpTaskTLowerBound,
2760 /// (Taskloops only) Upper bound.
2761 KmpTaskTUpperBound,
2762 /// (Taskloops only) Stride.
2763 KmpTaskTStride,
2764 /// (Taskloops only) Is last iteration flag.
2765 KmpTaskTLastIter,
2766 /// (Taskloops only) Reduction data.
2767 KmpTaskTReductions,
2768};
2769} // anonymous namespace
2770
2771void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2772 // If we are in simd mode or there are no entries, we don't need to do
2773 // anything.
2774 if (CGM.getLangOpts().OpenMPSimd || OMPBuilder.OffloadInfoManager.empty())
2775 return;
2776
2777 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
2778 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind,
2779 const llvm::TargetRegionEntryInfo &EntryInfo) -> void {
2780 SourceLocation Loc;
2781 if (Kind != llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR) {
2782 for (auto I = CGM.getContext().getSourceManager().fileinfo_begin(),
2783 E = CGM.getContext().getSourceManager().fileinfo_end();
2784 I != E; ++I) {
2785 if (I->getFirst().getUniqueID().getDevice() == EntryInfo.DeviceID &&
2786 I->getFirst().getUniqueID().getFile() == EntryInfo.FileID) {
2787 Loc = CGM.getContext().getSourceManager().translateFileLineCol(
2788 SourceFile: I->getFirst(), Line: EntryInfo.Line, Col: 1);
2789 break;
2790 }
2791 }
2792 }
2793 switch (Kind) {
2794 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR: {
2795 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2796 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for target region in "
2797 "%0 is incorrect: either the "
2798 "address or the ID is invalid.");
2799 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2800 } break;
2801 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR: {
2802 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2803 L: DiagnosticsEngine::Error, FormatString: "Offloading entry for declare target "
2804 "variable %0 is incorrect: the "
2805 "address is invalid.");
2806 CGM.getDiags().Report(Loc, DiagID) << EntryInfo.ParentName;
2807 } break;
2808 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR: {
2809 unsigned DiagID = CGM.getDiags().getCustomDiagID(
2810 L: DiagnosticsEngine::Error,
2811 FormatString: "Offloading entry for declare target variable is incorrect: the "
2812 "address is invalid.");
2813 CGM.getDiags().Report(DiagID);
2814 } break;
2815 }
2816 };
2817
2818 OMPBuilder.createOffloadEntriesAndInfoMetadata(ErrorReportFunction&: ErrorReportFn);
2819}
2820
2821void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty) {
2822 if (!KmpRoutineEntryPtrTy) {
2823 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2824 ASTContext &C = CGM.getContext();
2825 QualType KmpRoutineEntryTyArgs[] = {KmpInt32Ty, C.VoidPtrTy};
2826 FunctionProtoType::ExtProtoInfo EPI;
2827 KmpRoutineEntryPtrQTy = C.getPointerType(
2828 T: C.getFunctionType(ResultTy: KmpInt32Ty, Args: KmpRoutineEntryTyArgs, EPI));
2829 KmpRoutineEntryPtrTy = CGM.getTypes().ConvertType(T: KmpRoutineEntryPtrQTy);
2830 }
2831}
2832
2833namespace {
2834struct PrivateHelpersTy {
2835 PrivateHelpersTy(const Expr *OriginalRef, const VarDecl *Original,
2836 const VarDecl *PrivateCopy, const VarDecl *PrivateElemInit)
2837 : OriginalRef(OriginalRef), Original(Original), PrivateCopy(PrivateCopy),
2838 PrivateElemInit(PrivateElemInit) {}
2839 PrivateHelpersTy(const VarDecl *Original) : Original(Original) {}
2840 const Expr *OriginalRef = nullptr;
2841 const VarDecl *Original = nullptr;
2842 const VarDecl *PrivateCopy = nullptr;
2843 const VarDecl *PrivateElemInit = nullptr;
2844 bool isLocalPrivate() const {
2845 return !OriginalRef && !PrivateCopy && !PrivateElemInit;
2846 }
2847};
2848typedef std::pair<CharUnits /*Align*/, PrivateHelpersTy> PrivateDataTy;
2849} // anonymous namespace
2850
2851static bool isAllocatableDecl(const VarDecl *VD) {
2852 const VarDecl *CVD = VD->getCanonicalDecl();
2853 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
2854 return false;
2855 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
2856 // Use the default allocation.
2857 return !(AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc &&
2858 !AA->getAllocator());
2859}
2860
2861static RecordDecl *
2862createPrivatesRecordDecl(CodeGenModule &CGM, ArrayRef<PrivateDataTy> Privates) {
2863 if (!Privates.empty()) {
2864 ASTContext &C = CGM.getContext();
2865 // Build struct .kmp_privates_t. {
2866 // /* private vars */
2867 // };
2868 RecordDecl *RD = C.buildImplicitRecord(Name: ".kmp_privates.t");
2869 RD->startDefinition();
2870 for (const auto &Pair : Privates) {
2871 const VarDecl *VD = Pair.second.Original;
2872 QualType Type = VD->getType().getNonReferenceType();
2873 // If the private variable is a local variable with lvalue ref type,
2874 // allocate the pointer instead of the pointee type.
2875 if (Pair.second.isLocalPrivate()) {
2876 if (VD->getType()->isLValueReferenceType())
2877 Type = C.getPointerType(T: Type);
2878 if (isAllocatableDecl(VD))
2879 Type = C.getPointerType(T: Type);
2880 }
2881 FieldDecl *FD = addFieldToRecordDecl(C, DC: RD, FieldTy: Type);
2882 if (VD->hasAttrs()) {
2883 for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
2884 E(VD->getAttrs().end());
2885 I != E; ++I)
2886 FD->addAttr(A: *I);
2887 }
2888 }
2889 RD->completeDefinition();
2890 return RD;
2891 }
2892 return nullptr;
2893}
2894
2895static RecordDecl *
2896createKmpTaskTRecordDecl(CodeGenModule &CGM, OpenMPDirectiveKind Kind,
2897 QualType KmpInt32Ty,
2898 QualType KmpRoutineEntryPointerQTy) {
2899 ASTContext &C = CGM.getContext();
2900 // Build struct kmp_task_t {
2901 // void * shareds;
2902 // kmp_routine_entry_t routine;
2903 // kmp_int32 part_id;
2904 // kmp_cmplrdata_t data1;
2905 // kmp_cmplrdata_t data2;
2906 // For taskloops additional fields:
2907 // kmp_uint64 lb;
2908 // kmp_uint64 ub;
2909 // kmp_int64 st;
2910 // kmp_int32 liter;
2911 // void * reductions;
2912 // };
2913 RecordDecl *UD = C.buildImplicitRecord(Name: "kmp_cmplrdata_t", TK: TagTypeKind::Union);
2914 UD->startDefinition();
2915 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpInt32Ty);
2916 addFieldToRecordDecl(C, DC: UD, FieldTy: KmpRoutineEntryPointerQTy);
2917 UD->completeDefinition();
2918 QualType KmpCmplrdataTy = C.getRecordType(Decl: UD);
2919 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t");
2920 RD->startDefinition();
2921 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2922 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpRoutineEntryPointerQTy);
2923 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2924 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2925 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpCmplrdataTy);
2926 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
2927 QualType KmpUInt64Ty =
2928 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2929 QualType KmpInt64Ty =
2930 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2931 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2932 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpUInt64Ty);
2933 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt64Ty);
2934 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpInt32Ty);
2935 addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
2936 }
2937 RD->completeDefinition();
2938 return RD;
2939}
2940
2941static RecordDecl *
2942createKmpTaskTWithPrivatesRecordDecl(CodeGenModule &CGM, QualType KmpTaskTQTy,
2943 ArrayRef<PrivateDataTy> Privates) {
2944 ASTContext &C = CGM.getContext();
2945 // Build struct kmp_task_t_with_privates {
2946 // kmp_task_t task_data;
2947 // .kmp_privates_t. privates;
2948 // };
2949 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_task_t_with_privates");
2950 RD->startDefinition();
2951 addFieldToRecordDecl(C, DC: RD, FieldTy: KmpTaskTQTy);
2952 if (const RecordDecl *PrivateRD = createPrivatesRecordDecl(CGM, Privates))
2953 addFieldToRecordDecl(C, DC: RD, FieldTy: C.getRecordType(Decl: PrivateRD));
2954 RD->completeDefinition();
2955 return RD;
2956}
2957
2958/// Emit a proxy function which accepts kmp_task_t as the second
2959/// argument.
2960/// \code
2961/// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2962/// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2963/// For taskloops:
2964/// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2965/// tt->reductions, tt->shareds);
2966/// return 0;
2967/// }
2968/// \endcode
2969static llvm::Function *
2970emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
2971 OpenMPDirectiveKind Kind, QualType KmpInt32Ty,
2972 QualType KmpTaskTWithPrivatesPtrQTy,
2973 QualType KmpTaskTWithPrivatesQTy, QualType KmpTaskTQTy,
2974 QualType SharedsPtrTy, llvm::Function *TaskFunction,
2975 llvm::Value *TaskPrivatesMap) {
2976 ASTContext &C = CGM.getContext();
2977 FunctionArgList Args;
2978 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
2979 ImplicitParamKind::Other);
2980 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
2981 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
2982 ImplicitParamKind::Other);
2983 Args.push_back(Elt: &GtidArg);
2984 Args.push_back(Elt: &TaskTypeArg);
2985 const auto &TaskEntryFnInfo =
2986 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
2987 llvm::FunctionType *TaskEntryTy =
2988 CGM.getTypes().GetFunctionType(Info: TaskEntryFnInfo);
2989 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_entry", ""});
2990 auto *TaskEntry = llvm::Function::Create(
2991 Ty: TaskEntryTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
2992 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskEntry, FI: TaskEntryFnInfo);
2993 TaskEntry->setDoesNotRecurse();
2994 CodeGenFunction CGF(CGM);
2995 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: TaskEntry, FnInfo: TaskEntryFnInfo, Args,
2996 Loc, StartLoc: Loc);
2997
2998 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
2999 // tt,
3000 // For taskloops:
3001 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3002 // tt->task_data.shareds);
3003 llvm::Value *GtidParam = CGF.EmitLoadOfScalar(
3004 Addr: CGF.GetAddrOfLocalVar(VD: &GtidArg), /*Volatile=*/false, Ty: KmpInt32Ty, Loc);
3005 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3006 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3007 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3008 const auto *KmpTaskTWithPrivatesQTyRD =
3009 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3010 LValue Base =
3011 CGF.EmitLValueForField(Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3012 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3013 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
3014 LValue PartIdLVal = CGF.EmitLValueForField(Base, Field: *PartIdFI);
3015 llvm::Value *PartidParam = PartIdLVal.getPointer(CGF);
3016
3017 auto SharedsFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds);
3018 LValue SharedsLVal = CGF.EmitLValueForField(Base, Field: *SharedsFI);
3019 llvm::Value *SharedsParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3020 V: CGF.EmitLoadOfScalar(lvalue: SharedsLVal, Loc),
3021 DestTy: CGF.ConvertTypeForMem(T: SharedsPtrTy));
3022
3023 auto PrivatesFI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin(), n: 1);
3024 llvm::Value *PrivatesParam;
3025 if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
3026 LValue PrivatesLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PrivatesFI);
3027 PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3028 V: PrivatesLVal.getPointer(CGF), DestTy: CGF.VoidPtrTy);
3029 } else {
3030 PrivatesParam = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
3031 }
3032
3033 llvm::Value *CommonArgs[] = {
3034 GtidParam, PartidParam, PrivatesParam, TaskPrivatesMap,
3035 CGF.Builder
3036 .CreatePointerBitCastOrAddrSpaceCast(Addr: TDBase.getAddress(),
3037 Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty)
3038 .emitRawPointer(CGF)};
3039 SmallVector<llvm::Value *, 16> CallArgs(std::begin(arr&: CommonArgs),
3040 std::end(arr&: CommonArgs));
3041 if (isOpenMPTaskLoopDirective(DKind: Kind)) {
3042 auto LBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound);
3043 LValue LBLVal = CGF.EmitLValueForField(Base, Field: *LBFI);
3044 llvm::Value *LBParam = CGF.EmitLoadOfScalar(lvalue: LBLVal, Loc);
3045 auto UBFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound);
3046 LValue UBLVal = CGF.EmitLValueForField(Base, Field: *UBFI);
3047 llvm::Value *UBParam = CGF.EmitLoadOfScalar(lvalue: UBLVal, Loc);
3048 auto StFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride);
3049 LValue StLVal = CGF.EmitLValueForField(Base, Field: *StFI);
3050 llvm::Value *StParam = CGF.EmitLoadOfScalar(lvalue: StLVal, Loc);
3051 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3052 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3053 llvm::Value *LIParam = CGF.EmitLoadOfScalar(lvalue: LILVal, Loc);
3054 auto RFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions);
3055 LValue RLVal = CGF.EmitLValueForField(Base, Field: *RFI);
3056 llvm::Value *RParam = CGF.EmitLoadOfScalar(lvalue: RLVal, Loc);
3057 CallArgs.push_back(Elt: LBParam);
3058 CallArgs.push_back(Elt: UBParam);
3059 CallArgs.push_back(Elt: StParam);
3060 CallArgs.push_back(Elt: LIParam);
3061 CallArgs.push_back(Elt: RParam);
3062 }
3063 CallArgs.push_back(Elt: SharedsParam);
3064
3065 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskFunction,
3066 Args: CallArgs);
3067 CGF.EmitStoreThroughLValue(Src: RValue::get(V: CGF.Builder.getInt32(/*C=*/0)),
3068 Dst: CGF.MakeAddrLValue(Addr: CGF.ReturnValue, T: KmpInt32Ty));
3069 CGF.FinishFunction();
3070 return TaskEntry;
3071}
3072
3073static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
3074 SourceLocation Loc,
3075 QualType KmpInt32Ty,
3076 QualType KmpTaskTWithPrivatesPtrQTy,
3077 QualType KmpTaskTWithPrivatesQTy) {
3078 ASTContext &C = CGM.getContext();
3079 FunctionArgList Args;
3080 ImplicitParamDecl GtidArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, KmpInt32Ty,
3081 ImplicitParamKind::Other);
3082 ImplicitParamDecl TaskTypeArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3083 KmpTaskTWithPrivatesPtrQTy.withRestrict(),
3084 ImplicitParamKind::Other);
3085 Args.push_back(Elt: &GtidArg);
3086 Args.push_back(Elt: &TaskTypeArg);
3087 const auto &DestructorFnInfo =
3088 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: KmpInt32Ty, args: Args);
3089 llvm::FunctionType *DestructorFnTy =
3090 CGM.getTypes().GetFunctionType(Info: DestructorFnInfo);
3091 std::string Name =
3092 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_destructor", ""});
3093 auto *DestructorFn =
3094 llvm::Function::Create(Ty: DestructorFnTy, Linkage: llvm::GlobalValue::InternalLinkage,
3095 N: Name, M: &CGM.getModule());
3096 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: DestructorFn,
3097 FI: DestructorFnInfo);
3098 DestructorFn->setDoesNotRecurse();
3099 CodeGenFunction CGF(CGM);
3100 CGF.StartFunction(GD: GlobalDecl(), RetTy: KmpInt32Ty, Fn: DestructorFn, FnInfo: DestructorFnInfo,
3101 Args, Loc, StartLoc: Loc);
3102
3103 LValue Base = CGF.EmitLoadOfPointerLValue(
3104 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskTypeArg),
3105 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3106 const auto *KmpTaskTWithPrivatesQTyRD =
3107 cast<RecordDecl>(Val: KmpTaskTWithPrivatesQTy->getAsTagDecl());
3108 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3109 Base = CGF.EmitLValueForField(Base, Field: *FI);
3110 for (const auto *Field :
3111 cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->fields()) {
3112 if (QualType::DestructionKind DtorKind =
3113 Field->getType().isDestructedType()) {
3114 LValue FieldLValue = CGF.EmitLValueForField(Base, Field);
3115 CGF.pushDestroy(dtorKind: DtorKind, addr: FieldLValue.getAddress(), type: Field->getType());
3116 }
3117 }
3118 CGF.FinishFunction();
3119 return DestructorFn;
3120}
3121
3122/// Emit a privates mapping function for correct handling of private and
3123/// firstprivate variables.
3124/// \code
3125/// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3126/// **noalias priv1,..., <tyn> **noalias privn) {
3127/// *priv1 = &.privates.priv1;
3128/// ...;
3129/// *privn = &.privates.privn;
3130/// }
3131/// \endcode
3132static llvm::Value *
3133emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
3134 const OMPTaskDataTy &Data, QualType PrivatesQTy,
3135 ArrayRef<PrivateDataTy> Privates) {
3136 ASTContext &C = CGM.getContext();
3137 FunctionArgList Args;
3138 ImplicitParamDecl TaskPrivatesArg(
3139 C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3140 C.getPointerType(T: PrivatesQTy).withConst().withRestrict(),
3141 ImplicitParamKind::Other);
3142 Args.push_back(Elt: &TaskPrivatesArg);
3143 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, unsigned> PrivateVarsPos;
3144 unsigned Counter = 1;
3145 for (const Expr *E : Data.PrivateVars) {
3146 Args.push_back(Elt: ImplicitParamDecl::Create(
3147 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3148 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3149 .withConst()
3150 .withRestrict(),
3151 ParamKind: ImplicitParamKind::Other));
3152 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3153 PrivateVarsPos[VD] = Counter;
3154 ++Counter;
3155 }
3156 for (const Expr *E : Data.FirstprivateVars) {
3157 Args.push_back(Elt: ImplicitParamDecl::Create(
3158 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3159 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3160 .withConst()
3161 .withRestrict(),
3162 ParamKind: ImplicitParamKind::Other));
3163 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3164 PrivateVarsPos[VD] = Counter;
3165 ++Counter;
3166 }
3167 for (const Expr *E : Data.LastprivateVars) {
3168 Args.push_back(Elt: ImplicitParamDecl::Create(
3169 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3170 T: C.getPointerType(T: C.getPointerType(T: E->getType()))
3171 .withConst()
3172 .withRestrict(),
3173 ParamKind: ImplicitParamKind::Other));
3174 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3175 PrivateVarsPos[VD] = Counter;
3176 ++Counter;
3177 }
3178 for (const VarDecl *VD : Data.PrivateLocals) {
3179 QualType Ty = VD->getType().getNonReferenceType();
3180 if (VD->getType()->isLValueReferenceType())
3181 Ty = C.getPointerType(T: Ty);
3182 if (isAllocatableDecl(VD))
3183 Ty = C.getPointerType(T: Ty);
3184 Args.push_back(Elt: ImplicitParamDecl::Create(
3185 C, /*DC=*/nullptr, IdLoc: Loc, /*Id=*/nullptr,
3186 T: C.getPointerType(T: C.getPointerType(T: Ty)).withConst().withRestrict(),
3187 ParamKind: ImplicitParamKind::Other));
3188 PrivateVarsPos[VD] = Counter;
3189 ++Counter;
3190 }
3191 const auto &TaskPrivatesMapFnInfo =
3192 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3193 llvm::FunctionType *TaskPrivatesMapTy =
3194 CGM.getTypes().GetFunctionType(Info: TaskPrivatesMapFnInfo);
3195 std::string Name =
3196 CGM.getOpenMPRuntime().getName(Parts: {"omp_task_privates_map", ""});
3197 auto *TaskPrivatesMap = llvm::Function::Create(
3198 Ty: TaskPrivatesMapTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
3199 M: &CGM.getModule());
3200 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskPrivatesMap,
3201 FI: TaskPrivatesMapFnInfo);
3202 if (CGM.getLangOpts().Optimize) {
3203 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::NoInline);
3204 TaskPrivatesMap->removeFnAttr(Kind: llvm::Attribute::OptimizeNone);
3205 TaskPrivatesMap->addFnAttr(Kind: llvm::Attribute::AlwaysInline);
3206 }
3207 CodeGenFunction CGF(CGM);
3208 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskPrivatesMap,
3209 FnInfo: TaskPrivatesMapFnInfo, Args, Loc, StartLoc: Loc);
3210
3211 // *privi = &.privates.privi;
3212 LValue Base = CGF.EmitLoadOfPointerLValue(
3213 Ptr: CGF.GetAddrOfLocalVar(VD: &TaskPrivatesArg),
3214 PtrTy: TaskPrivatesArg.getType()->castAs<PointerType>());
3215 const auto *PrivatesQTyRD = cast<RecordDecl>(Val: PrivatesQTy->getAsTagDecl());
3216 Counter = 0;
3217 for (const FieldDecl *Field : PrivatesQTyRD->fields()) {
3218 LValue FieldLVal = CGF.EmitLValueForField(Base, Field);
3219 const VarDecl *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
3220 LValue RefLVal =
3221 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD), T: VD->getType());
3222 LValue RefLoadLVal = CGF.EmitLoadOfPointerLValue(
3223 Ptr: RefLVal.getAddress(), PtrTy: RefLVal.getType()->castAs<PointerType>());
3224 CGF.EmitStoreOfScalar(value: FieldLVal.getPointer(CGF), lvalue: RefLoadLVal);
3225 ++Counter;
3226 }
3227 CGF.FinishFunction();
3228 return TaskPrivatesMap;
3229}
3230
3231/// Emit initialization for private variables in task-based directives.
3232static void emitPrivatesInit(CodeGenFunction &CGF,
3233 const OMPExecutableDirective &D,
3234 Address KmpTaskSharedsPtr, LValue TDBase,
3235 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3236 QualType SharedsTy, QualType SharedsPtrTy,
3237 const OMPTaskDataTy &Data,
3238 ArrayRef<PrivateDataTy> Privates, bool ForDup) {
3239 ASTContext &C = CGF.getContext();
3240 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3241 LValue PrivatesBase = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3242 OpenMPDirectiveKind Kind = isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())
3243 ? OMPD_taskloop
3244 : OMPD_task;
3245 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: Kind);
3246 CodeGenFunction::CGCapturedStmtInfo CapturesInfo(CS);
3247 LValue SrcBase;
3248 bool IsTargetTask =
3249 isOpenMPTargetDataManagementDirective(DKind: D.getDirectiveKind()) ||
3250 isOpenMPTargetExecutionDirective(DKind: D.getDirectiveKind());
3251 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3252 // PointersArray, SizesArray, and MappersArray. The original variables for
3253 // these arrays are not captured and we get their addresses explicitly.
3254 if ((!IsTargetTask && !Data.FirstprivateVars.empty() && ForDup) ||
3255 (IsTargetTask && KmpTaskSharedsPtr.isValid())) {
3256 SrcBase = CGF.MakeAddrLValue(
3257 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3258 Addr: KmpTaskSharedsPtr, Ty: CGF.ConvertTypeForMem(T: SharedsPtrTy),
3259 ElementTy: CGF.ConvertTypeForMem(T: SharedsTy)),
3260 T: SharedsTy);
3261 }
3262 FI = cast<RecordDecl>(Val: FI->getType()->getAsTagDecl())->field_begin();
3263 for (const PrivateDataTy &Pair : Privates) {
3264 // Do not initialize private locals.
3265 if (Pair.second.isLocalPrivate()) {
3266 ++FI;
3267 continue;
3268 }
3269 const VarDecl *VD = Pair.second.PrivateCopy;
3270 const Expr *Init = VD->getAnyInitializer();
3271 if (Init && (!ForDup || (isa<CXXConstructExpr>(Val: Init) &&
3272 !CGF.isTrivialInitializer(Init)))) {
3273 LValue PrivateLValue = CGF.EmitLValueForField(Base: PrivatesBase, Field: *FI);
3274 if (const VarDecl *Elem = Pair.second.PrivateElemInit) {
3275 const VarDecl *OriginalVD = Pair.second.Original;
3276 // Check if the variable is the target-based BasePointersArray,
3277 // PointersArray, SizesArray, or MappersArray.
3278 LValue SharedRefLValue;
3279 QualType Type = PrivateLValue.getType();
3280 const FieldDecl *SharedField = CapturesInfo.lookup(VD: OriginalVD);
3281 if (IsTargetTask && !SharedField) {
3282 assert(isa<ImplicitParamDecl>(OriginalVD) &&
3283 isa<CapturedDecl>(OriginalVD->getDeclContext()) &&
3284 cast<CapturedDecl>(OriginalVD->getDeclContext())
3285 ->getNumParams() == 0 &&
3286 isa<TranslationUnitDecl>(
3287 cast<CapturedDecl>(OriginalVD->getDeclContext())
3288 ->getDeclContext()) &&
3289 "Expected artificial target data variable.");
3290 SharedRefLValue =
3291 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: OriginalVD), T: Type);
3292 } else if (ForDup) {
3293 SharedRefLValue = CGF.EmitLValueForField(Base: SrcBase, Field: SharedField);
3294 SharedRefLValue = CGF.MakeAddrLValue(
3295 Addr: SharedRefLValue.getAddress().withAlignment(
3296 NewAlignment: C.getDeclAlign(D: OriginalVD)),
3297 T: SharedRefLValue.getType(), BaseInfo: LValueBaseInfo(AlignmentSource::Decl),
3298 TBAAInfo: SharedRefLValue.getTBAAInfo());
3299 } else if (CGF.LambdaCaptureFields.count(
3300 Val: Pair.second.Original->getCanonicalDecl()) > 0 ||
3301 isa_and_nonnull<BlockDecl>(Val: CGF.CurCodeDecl)) {
3302 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3303 } else {
3304 // Processing for implicitly captured variables.
3305 InlinedOpenMPRegionRAII Region(
3306 CGF, [](CodeGenFunction &, PrePostActionTy &) {}, OMPD_unknown,
3307 /*HasCancel=*/false, /*NoInheritance=*/true);
3308 SharedRefLValue = CGF.EmitLValue(E: Pair.second.OriginalRef);
3309 }
3310 if (Type->isArrayType()) {
3311 // Initialize firstprivate array.
3312 if (!isa<CXXConstructExpr>(Val: Init) || CGF.isTrivialInitializer(Init)) {
3313 // Perform simple memcpy.
3314 CGF.EmitAggregateAssign(Dest: PrivateLValue, Src: SharedRefLValue, EltTy: Type);
3315 } else {
3316 // Initialize firstprivate array using element-by-element
3317 // initialization.
3318 CGF.EmitOMPAggregateAssign(
3319 DestAddr: PrivateLValue.getAddress(), SrcAddr: SharedRefLValue.getAddress(), OriginalType: Type,
3320 CopyGen: [&CGF, Elem, Init, &CapturesInfo](Address DestElement,
3321 Address SrcElement) {
3322 // Clean up any temporaries needed by the initialization.
3323 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3324 InitScope.addPrivate(LocalVD: Elem, Addr: SrcElement);
3325 (void)InitScope.Privatize();
3326 // Emit initialization for single element.
3327 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(
3328 CGF, &CapturesInfo);
3329 CGF.EmitAnyExprToMem(E: Init, Location: DestElement,
3330 Quals: Init->getType().getQualifiers(),
3331 /*IsInitializer=*/false);
3332 });
3333 }
3334 } else {
3335 CodeGenFunction::OMPPrivateScope InitScope(CGF);
3336 InitScope.addPrivate(LocalVD: Elem, Addr: SharedRefLValue.getAddress());
3337 (void)InitScope.Privatize();
3338 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CapturesInfo);
3339 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue,
3340 /*capturedByInit=*/false);
3341 }
3342 } else {
3343 CGF.EmitExprAsInit(init: Init, D: VD, lvalue: PrivateLValue, /*capturedByInit=*/false);
3344 }
3345 }
3346 ++FI;
3347 }
3348}
3349
3350/// Check if duplication function is required for taskloops.
3351static bool checkInitIsRequired(CodeGenFunction &CGF,
3352 ArrayRef<PrivateDataTy> Privates) {
3353 bool InitRequired = false;
3354 for (const PrivateDataTy &Pair : Privates) {
3355 if (Pair.second.isLocalPrivate())
3356 continue;
3357 const VarDecl *VD = Pair.second.PrivateCopy;
3358 const Expr *Init = VD->getAnyInitializer();
3359 InitRequired = InitRequired || (isa_and_nonnull<CXXConstructExpr>(Val: Init) &&
3360 !CGF.isTrivialInitializer(Init));
3361 if (InitRequired)
3362 break;
3363 }
3364 return InitRequired;
3365}
3366
3367
3368/// Emit task_dup function (for initialization of
3369/// private/firstprivate/lastprivate vars and last_iter flag)
3370/// \code
3371/// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3372/// lastpriv) {
3373/// // setup lastprivate flag
3374/// task_dst->last = lastpriv;
3375/// // could be constructor calls here...
3376/// }
3377/// \endcode
3378static llvm::Value *
3379emitTaskDupFunction(CodeGenModule &CGM, SourceLocation Loc,
3380 const OMPExecutableDirective &D,
3381 QualType KmpTaskTWithPrivatesPtrQTy,
3382 const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3383 const RecordDecl *KmpTaskTQTyRD, QualType SharedsTy,
3384 QualType SharedsPtrTy, const OMPTaskDataTy &Data,
3385 ArrayRef<PrivateDataTy> Privates, bool WithLastIter) {
3386 ASTContext &C = CGM.getContext();
3387 FunctionArgList Args;
3388 ImplicitParamDecl DstArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3389 KmpTaskTWithPrivatesPtrQTy,
3390 ImplicitParamKind::Other);
3391 ImplicitParamDecl SrcArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
3392 KmpTaskTWithPrivatesPtrQTy,
3393 ImplicitParamKind::Other);
3394 ImplicitParamDecl LastprivArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
3395 ImplicitParamKind::Other);
3396 Args.push_back(Elt: &DstArg);
3397 Args.push_back(Elt: &SrcArg);
3398 Args.push_back(Elt: &LastprivArg);
3399 const auto &TaskDupFnInfo =
3400 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
3401 llvm::FunctionType *TaskDupTy = CGM.getTypes().GetFunctionType(Info: TaskDupFnInfo);
3402 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"omp_task_dup", ""});
3403 auto *TaskDup = llvm::Function::Create(
3404 Ty: TaskDupTy, Linkage: llvm::GlobalValue::InternalLinkage, N: Name, M: &CGM.getModule());
3405 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: TaskDup, FI: TaskDupFnInfo);
3406 TaskDup->setDoesNotRecurse();
3407 CodeGenFunction CGF(CGM);
3408 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn: TaskDup, FnInfo: TaskDupFnInfo, Args, Loc,
3409 StartLoc: Loc);
3410
3411 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3412 Ptr: CGF.GetAddrOfLocalVar(VD: &DstArg),
3413 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3414 // task_dst->liter = lastpriv;
3415 if (WithLastIter) {
3416 auto LIFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTLastIter);
3417 LValue Base = CGF.EmitLValueForField(
3418 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3419 LValue LILVal = CGF.EmitLValueForField(Base, Field: *LIFI);
3420 llvm::Value *Lastpriv = CGF.EmitLoadOfScalar(
3421 Addr: CGF.GetAddrOfLocalVar(VD: &LastprivArg), /*Volatile=*/false, Ty: C.IntTy, Loc);
3422 CGF.EmitStoreOfScalar(value: Lastpriv, lvalue: LILVal);
3423 }
3424
3425 // Emit initial values for private copies (if any).
3426 assert(!Privates.empty());
3427 Address KmpTaskSharedsPtr = Address::invalid();
3428 if (!Data.FirstprivateVars.empty()) {
3429 LValue TDBase = CGF.EmitLoadOfPointerLValue(
3430 Ptr: CGF.GetAddrOfLocalVar(VD: &SrcArg),
3431 PtrTy: KmpTaskTWithPrivatesPtrQTy->castAs<PointerType>());
3432 LValue Base = CGF.EmitLValueForField(
3433 Base: TDBase, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3434 KmpTaskSharedsPtr = Address(
3435 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValueForField(
3436 Base, Field: *std::next(x: KmpTaskTQTyRD->field_begin(),
3437 n: KmpTaskTShareds)),
3438 Loc),
3439 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3440 }
3441 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase, KmpTaskTWithPrivatesQTyRD,
3442 SharedsTy, SharedsPtrTy, Data, Privates, /*ForDup=*/true);
3443 CGF.FinishFunction();
3444 return TaskDup;
3445}
3446
3447/// Checks if destructor function is required to be generated.
3448/// \return true if cleanups are required, false otherwise.
3449static bool
3450checkDestructorsRequired(const RecordDecl *KmpTaskTWithPrivatesQTyRD,
3451 ArrayRef<PrivateDataTy> Privates) {
3452 for (const PrivateDataTy &P : Privates) {
3453 if (P.second.isLocalPrivate())
3454 continue;
3455 QualType Ty = P.second.Original->getType().getNonReferenceType();
3456 if (Ty.isDestructedType())
3457 return true;
3458 }
3459 return false;
3460}
3461
3462namespace {
3463/// Loop generator for OpenMP iterator expression.
3464class OMPIteratorGeneratorScope final
3465 : public CodeGenFunction::OMPPrivateScope {
3466 CodeGenFunction &CGF;
3467 const OMPIteratorExpr *E = nullptr;
3468 SmallVector<CodeGenFunction::JumpDest, 4> ContDests;
3469 SmallVector<CodeGenFunction::JumpDest, 4> ExitDests;
3470 OMPIteratorGeneratorScope() = delete;
3471 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope &) = delete;
3472
3473public:
3474 OMPIteratorGeneratorScope(CodeGenFunction &CGF, const OMPIteratorExpr *E)
3475 : CodeGenFunction::OMPPrivateScope(CGF), CGF(CGF), E(E) {
3476 if (!E)
3477 return;
3478 SmallVector<llvm::Value *, 4> Uppers;
3479 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3480 Uppers.push_back(Elt: CGF.EmitScalarExpr(E: E->getHelper(I).Upper));
3481 const auto *VD = cast<VarDecl>(Val: E->getIteratorDecl(I));
3482 addPrivate(LocalVD: VD, Addr: CGF.CreateMemTemp(T: VD->getType(), Name: VD->getName()));
3483 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3484 addPrivate(
3485 LocalVD: HelperData.CounterVD,
3486 Addr: CGF.CreateMemTemp(T: HelperData.CounterVD->getType(), Name: "counter.addr"));
3487 }
3488 Privatize();
3489
3490 for (unsigned I = 0, End = E->numOfIterators(); I < End; ++I) {
3491 const OMPIteratorHelperData &HelperData = E->getHelper(I);
3492 LValue CLVal =
3493 CGF.MakeAddrLValue(Addr: CGF.GetAddrOfLocalVar(VD: HelperData.CounterVD),
3494 T: HelperData.CounterVD->getType());
3495 // Counter = 0;
3496 CGF.EmitStoreOfScalar(
3497 value: llvm::ConstantInt::get(Ty: CLVal.getAddress().getElementType(), V: 0),
3498 lvalue: CLVal);
3499 CodeGenFunction::JumpDest &ContDest =
3500 ContDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.cont"));
3501 CodeGenFunction::JumpDest &ExitDest =
3502 ExitDests.emplace_back(Args: CGF.getJumpDestInCurrentScope(Name: "iter.exit"));
3503 // N = <number-of_iterations>;
3504 llvm::Value *N = Uppers[I];
3505 // cont:
3506 // if (Counter < N) goto body; else goto exit;
3507 CGF.EmitBlock(BB: ContDest.getBlock());
3508 auto *CVal =
3509 CGF.EmitLoadOfScalar(lvalue: CLVal, Loc: HelperData.CounterVD->getLocation());
3510 llvm::Value *Cmp =
3511 HelperData.CounterVD->getType()->isSignedIntegerOrEnumerationType()
3512 ? CGF.Builder.CreateICmpSLT(LHS: CVal, RHS: N)
3513 : CGF.Builder.CreateICmpULT(LHS: CVal, RHS: N);
3514 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "iter.body");
3515 CGF.Builder.CreateCondBr(Cond: Cmp, True: BodyBB, False: ExitDest.getBlock());
3516 // body:
3517 CGF.EmitBlock(BB: BodyBB);
3518 // Iteri = Begini + Counter * Stepi;
3519 CGF.EmitIgnoredExpr(E: HelperData.Update);
3520 }
3521 }
3522 ~OMPIteratorGeneratorScope() {
3523 if (!E)
3524 return;
3525 for (unsigned I = E->numOfIterators(); I > 0; --I) {
3526 // Counter = Counter + 1;
3527 const OMPIteratorHelperData &HelperData = E->getHelper(I: I - 1);
3528 CGF.EmitIgnoredExpr(E: HelperData.CounterUpdate);
3529 // goto cont;
3530 CGF.EmitBranchThroughCleanup(Dest: ContDests[I - 1]);
3531 // exit:
3532 CGF.EmitBlock(BB: ExitDests[I - 1].getBlock(), /*IsFinished=*/I == 1);
3533 }
3534 }
3535};
3536} // namespace
3537
3538static std::pair<llvm::Value *, llvm::Value *>
3539getPointerAndSize(CodeGenFunction &CGF, const Expr *E) {
3540 const auto *OASE = dyn_cast<OMPArrayShapingExpr>(Val: E);
3541 llvm::Value *Addr;
3542 if (OASE) {
3543 const Expr *Base = OASE->getBase();
3544 Addr = CGF.EmitScalarExpr(E: Base);
3545 } else {
3546 Addr = CGF.EmitLValue(E).getPointer(CGF);
3547 }
3548 llvm::Value *SizeVal;
3549 QualType Ty = E->getType();
3550 if (OASE) {
3551 SizeVal = CGF.getTypeSize(Ty: OASE->getBase()->getType()->getPointeeType());
3552 for (const Expr *SE : OASE->getDimensions()) {
3553 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
3554 Sz = CGF.EmitScalarConversion(
3555 Src: Sz, SrcTy: SE->getType(), DstTy: CGF.getContext().getSizeType(), Loc: SE->getExprLoc());
3556 SizeVal = CGF.Builder.CreateNUWMul(LHS: SizeVal, RHS: Sz);
3557 }
3558 } else if (const auto *ASE =
3559 dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenImpCasts())) {
3560 LValue UpAddrLVal = CGF.EmitArraySectionExpr(E: ASE, /*IsLowerBound=*/false);
3561 Address UpAddrAddress = UpAddrLVal.getAddress();
3562 llvm::Value *UpAddr = CGF.Builder.CreateConstGEP1_32(
3563 Ty: UpAddrAddress.getElementType(), Ptr: UpAddrAddress.emitRawPointer(CGF),
3564 /*Idx0=*/1);
3565 llvm::Value *LowIntPtr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.SizeTy);
3566 llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(V: UpAddr, DestTy: CGF.SizeTy);
3567 SizeVal = CGF.Builder.CreateNUWSub(LHS: UpIntPtr, RHS: LowIntPtr);
3568 } else {
3569 SizeVal = CGF.getTypeSize(Ty);
3570 }
3571 return std::make_pair(x&: Addr, y&: SizeVal);
3572}
3573
3574/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3575static void getKmpAffinityType(ASTContext &C, QualType &KmpTaskAffinityInfoTy) {
3576 QualType FlagsTy = C.getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/false);
3577 if (KmpTaskAffinityInfoTy.isNull()) {
3578 RecordDecl *KmpAffinityInfoRD =
3579 C.buildImplicitRecord(Name: "kmp_task_affinity_info_t");
3580 KmpAffinityInfoRD->startDefinition();
3581 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getIntPtrType());
3582 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: C.getSizeType());
3583 addFieldToRecordDecl(C, DC: KmpAffinityInfoRD, FieldTy: FlagsTy);
3584 KmpAffinityInfoRD->completeDefinition();
3585 KmpTaskAffinityInfoTy = C.getRecordType(Decl: KmpAffinityInfoRD);
3586 }
3587}
3588
3589CGOpenMPRuntime::TaskResultTy
3590CGOpenMPRuntime::emitTaskInit(CodeGenFunction &CGF, SourceLocation Loc,
3591 const OMPExecutableDirective &D,
3592 llvm::Function *TaskFunction, QualType SharedsTy,
3593 Address Shareds, const OMPTaskDataTy &Data) {
3594 ASTContext &C = CGM.getContext();
3595 llvm::SmallVector<PrivateDataTy, 4> Privates;
3596 // Aggregate privates and sort them by the alignment.
3597 const auto *I = Data.PrivateCopies.begin();
3598 for (const Expr *E : Data.PrivateVars) {
3599 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3600 Privates.emplace_back(
3601 Args: C.getDeclAlign(D: VD),
3602 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3603 /*PrivateElemInit=*/nullptr));
3604 ++I;
3605 }
3606 I = Data.FirstprivateCopies.begin();
3607 const auto *IElemInitRef = Data.FirstprivateInits.begin();
3608 for (const Expr *E : Data.FirstprivateVars) {
3609 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3610 Privates.emplace_back(
3611 Args: C.getDeclAlign(D: VD),
3612 Args: PrivateHelpersTy(
3613 E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3614 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IElemInitRef)->getDecl())));
3615 ++I;
3616 ++IElemInitRef;
3617 }
3618 I = Data.LastprivateCopies.begin();
3619 for (const Expr *E : Data.LastprivateVars) {
3620 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
3621 Privates.emplace_back(
3622 Args: C.getDeclAlign(D: VD),
3623 Args: PrivateHelpersTy(E, VD, cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()),
3624 /*PrivateElemInit=*/nullptr));
3625 ++I;
3626 }
3627 for (const VarDecl *VD : Data.PrivateLocals) {
3628 if (isAllocatableDecl(VD))
3629 Privates.emplace_back(Args: CGM.getPointerAlign(), Args: PrivateHelpersTy(VD));
3630 else
3631 Privates.emplace_back(Args: C.getDeclAlign(D: VD), Args: PrivateHelpersTy(VD));
3632 }
3633 llvm::stable_sort(Range&: Privates,
3634 C: [](const PrivateDataTy &L, const PrivateDataTy &R) {
3635 return L.first > R.first;
3636 });
3637 QualType KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3638 // Build type kmp_routine_entry_t (if not built yet).
3639 emitKmpRoutineEntryT(KmpInt32Ty);
3640 // Build type kmp_task_t (if not built yet).
3641 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind())) {
3642 if (SavedKmpTaskloopTQTy.isNull()) {
3643 SavedKmpTaskloopTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3644 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3645 }
3646 KmpTaskTQTy = SavedKmpTaskloopTQTy;
3647 } else {
3648 assert((D.getDirectiveKind() == OMPD_task ||
3649 isOpenMPTargetExecutionDirective(D.getDirectiveKind()) ||
3650 isOpenMPTargetDataManagementDirective(D.getDirectiveKind())) &&
3651 "Expected taskloop, task or target directive");
3652 if (SavedKmpTaskTQTy.isNull()) {
3653 SavedKmpTaskTQTy = C.getRecordType(Decl: createKmpTaskTRecordDecl(
3654 CGM, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpRoutineEntryPointerQTy: KmpRoutineEntryPtrQTy));
3655 }
3656 KmpTaskTQTy = SavedKmpTaskTQTy;
3657 }
3658 const auto *KmpTaskTQTyRD = cast<RecordDecl>(Val: KmpTaskTQTy->getAsTagDecl());
3659 // Build particular struct kmp_task_t for the given task.
3660 const RecordDecl *KmpTaskTWithPrivatesQTyRD =
3661 createKmpTaskTWithPrivatesRecordDecl(CGM, KmpTaskTQTy, Privates);
3662 QualType KmpTaskTWithPrivatesQTy = C.getRecordType(Decl: KmpTaskTWithPrivatesQTyRD);
3663 QualType KmpTaskTWithPrivatesPtrQTy =
3664 C.getPointerType(T: KmpTaskTWithPrivatesQTy);
3665 llvm::Type *KmpTaskTWithPrivatesPtrTy = CGF.Builder.getPtrTy(AddrSpace: 0);
3666 llvm::Value *KmpTaskTWithPrivatesTySize =
3667 CGF.getTypeSize(Ty: KmpTaskTWithPrivatesQTy);
3668 QualType SharedsPtrTy = C.getPointerType(T: SharedsTy);
3669
3670 // Emit initial values for private copies (if any).
3671 llvm::Value *TaskPrivatesMap = nullptr;
3672 llvm::Type *TaskPrivatesMapTy =
3673 std::next(x: TaskFunction->arg_begin(), n: 3)->getType();
3674 if (!Privates.empty()) {
3675 auto FI = std::next(x: KmpTaskTWithPrivatesQTyRD->field_begin());
3676 TaskPrivatesMap =
3677 emitTaskPrivateMappingFunction(CGM, Loc, Data, PrivatesQTy: FI->getType(), Privates);
3678 TaskPrivatesMap = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3679 V: TaskPrivatesMap, DestTy: TaskPrivatesMapTy);
3680 } else {
3681 TaskPrivatesMap = llvm::ConstantPointerNull::get(
3682 T: cast<llvm::PointerType>(Val: TaskPrivatesMapTy));
3683 }
3684 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3685 // kmp_task_t *tt);
3686 llvm::Function *TaskEntry = emitProxyTaskFunction(
3687 CGM, Loc, Kind: D.getDirectiveKind(), KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3688 KmpTaskTWithPrivatesQTy, KmpTaskTQTy, SharedsPtrTy, TaskFunction,
3689 TaskPrivatesMap);
3690
3691 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3692 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3693 // kmp_routine_entry_t *task_entry);
3694 // Task flags. Format is taken from
3695 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3696 // description of kmp_tasking_flags struct.
3697 enum {
3698 TiedFlag = 0x1,
3699 FinalFlag = 0x2,
3700 DestructorsFlag = 0x8,
3701 PriorityFlag = 0x20,
3702 DetachableFlag = 0x40,
3703 };
3704 unsigned Flags = Data.Tied ? TiedFlag : 0;
3705 bool NeedsCleanup = false;
3706 if (!Privates.empty()) {
3707 NeedsCleanup =
3708 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD, Privates);
3709 if (NeedsCleanup)
3710 Flags = Flags | DestructorsFlag;
3711 }
3712 if (Data.Priority.getInt())
3713 Flags = Flags | PriorityFlag;
3714 if (D.hasClausesOfKind<OMPDetachClause>())
3715 Flags = Flags | DetachableFlag;
3716 llvm::Value *TaskFlags =
3717 Data.Final.getPointer()
3718 ? CGF.Builder.CreateSelect(C: Data.Final.getPointer(),
3719 True: CGF.Builder.getInt32(C: FinalFlag),
3720 False: CGF.Builder.getInt32(/*C=*/0))
3721 : CGF.Builder.getInt32(C: Data.Final.getInt() ? FinalFlag : 0);
3722 TaskFlags = CGF.Builder.CreateOr(LHS: TaskFlags, RHS: CGF.Builder.getInt32(C: Flags));
3723 llvm::Value *SharedsSize = CGM.getSize(numChars: C.getTypeSizeInChars(T: SharedsTy));
3724 SmallVector<llvm::Value *, 8> AllocArgs = {emitUpdateLocation(CGF, Loc),
3725 getThreadID(CGF, Loc), TaskFlags, KmpTaskTWithPrivatesTySize,
3726 SharedsSize, CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3727 V: TaskEntry, DestTy: KmpRoutineEntryPtrTy)};
3728 llvm::Value *NewTask;
3729 if (D.hasClausesOfKind<OMPNowaitClause>()) {
3730 // Check if we have any device clause associated with the directive.
3731 const Expr *Device = nullptr;
3732 if (auto *C = D.getSingleClause<OMPDeviceClause>())
3733 Device = C->getDevice();
3734 // Emit device ID if any otherwise use default value.
3735 llvm::Value *DeviceID;
3736 if (Device)
3737 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
3738 DestTy: CGF.Int64Ty, /*isSigned=*/true);
3739 else
3740 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
3741 AllocArgs.push_back(Elt: DeviceID);
3742 NewTask = CGF.EmitRuntimeCall(
3743 callee: OMPBuilder.getOrCreateRuntimeFunction(
3744 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_target_task_alloc),
3745 args: AllocArgs);
3746 } else {
3747 NewTask =
3748 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
3749 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_alloc),
3750 args: AllocArgs);
3751 }
3752 // Emit detach clause initialization.
3753 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3754 // task_descriptor);
3755 if (const auto *DC = D.getSingleClause<OMPDetachClause>()) {
3756 const Expr *Evt = DC->getEventHandler()->IgnoreParenImpCasts();
3757 LValue EvtLVal = CGF.EmitLValue(E: Evt);
3758
3759 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3760 // int gtid, kmp_task_t *task);
3761 llvm::Value *Loc = emitUpdateLocation(CGF, Loc: DC->getBeginLoc());
3762 llvm::Value *Tid = getThreadID(CGF, Loc: DC->getBeginLoc());
3763 Tid = CGF.Builder.CreateIntCast(V: Tid, DestTy: CGF.IntTy, /*isSigned=*/false);
3764 llvm::Value *EvtVal = CGF.EmitRuntimeCall(
3765 callee: OMPBuilder.getOrCreateRuntimeFunction(
3766 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_allow_completion_event),
3767 args: {Loc, Tid, NewTask});
3768 EvtVal = CGF.EmitScalarConversion(Src: EvtVal, SrcTy: C.VoidPtrTy, DstTy: Evt->getType(),
3769 Loc: Evt->getExprLoc());
3770 CGF.EmitStoreOfScalar(value: EvtVal, lvalue: EvtLVal);
3771 }
3772 // Process affinity clauses.
3773 if (D.hasClausesOfKind<OMPAffinityClause>()) {
3774 // Process list of affinity data.
3775 ASTContext &C = CGM.getContext();
3776 Address AffinitiesArray = Address::invalid();
3777 // Calculate number of elements to form the array of affinity data.
3778 llvm::Value *NumOfElements = nullptr;
3779 unsigned NumAffinities = 0;
3780 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3781 if (const Expr *Modifier = C->getModifier()) {
3782 const auto *IE = cast<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts());
3783 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
3784 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
3785 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
3786 NumOfElements =
3787 NumOfElements ? CGF.Builder.CreateNUWMul(LHS: NumOfElements, RHS: Sz) : Sz;
3788 }
3789 } else {
3790 NumAffinities += C->varlist_size();
3791 }
3792 }
3793 getKmpAffinityType(C&: CGM.getContext(), KmpTaskAffinityInfoTy);
3794 // Fields ids in kmp_task_affinity_info record.
3795 enum RTLAffinityInfoFieldsTy { BaseAddr, Len, Flags };
3796
3797 QualType KmpTaskAffinityInfoArrayTy;
3798 if (NumOfElements) {
3799 NumOfElements = CGF.Builder.CreateNUWAdd(
3800 LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: NumAffinities), RHS: NumOfElements);
3801 auto *OVE = new (C) OpaqueValueExpr(
3802 Loc,
3803 C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.getSizeType()), /*Signed=*/0),
3804 VK_PRValue);
3805 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
3806 RValue::get(V: NumOfElements));
3807 KmpTaskAffinityInfoArrayTy = C.getVariableArrayType(
3808 EltTy: KmpTaskAffinityInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
3809 /*IndexTypeQuals=*/0);
3810 // Properly emit variable-sized array.
3811 auto *PD = ImplicitParamDecl::Create(C, T: KmpTaskAffinityInfoArrayTy,
3812 ParamKind: ImplicitParamKind::Other);
3813 CGF.EmitVarDecl(D: *PD);
3814 AffinitiesArray = CGF.GetAddrOfLocalVar(VD: PD);
3815 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
3816 /*isSigned=*/false);
3817 } else {
3818 KmpTaskAffinityInfoArrayTy = C.getConstantArrayType(
3819 EltTy: KmpTaskAffinityInfoTy,
3820 ArySize: llvm::APInt(C.getTypeSize(T: C.getSizeType()), NumAffinities), SizeExpr: nullptr,
3821 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
3822 AffinitiesArray =
3823 CGF.CreateMemTemp(T: KmpTaskAffinityInfoArrayTy, Name: ".affs.arr.addr");
3824 AffinitiesArray = CGF.Builder.CreateConstArrayGEP(Addr: AffinitiesArray, Index: 0);
3825 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumAffinities,
3826 /*isSigned=*/IsSigned: false);
3827 }
3828
3829 const auto *KmpAffinityInfoRD = KmpTaskAffinityInfoTy->getAsRecordDecl();
3830 // Fill array by elements without iterators.
3831 unsigned Pos = 0;
3832 bool HasIterator = false;
3833 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3834 if (C->getModifier()) {
3835 HasIterator = true;
3836 continue;
3837 }
3838 for (const Expr *E : C->varlist()) {
3839 llvm::Value *Addr;
3840 llvm::Value *Size;
3841 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3842 LValue Base =
3843 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateConstGEP(Addr: AffinitiesArray, Index: Pos),
3844 T: KmpTaskAffinityInfoTy);
3845 // affs[i].base_addr = &<Affinities[i].second>;
3846 LValue BaseAddrLVal = CGF.EmitLValueForField(
3847 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3848 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3849 lvalue: BaseAddrLVal);
3850 // affs[i].len = sizeof(<Affinities[i].second>);
3851 LValue LenLVal = CGF.EmitLValueForField(
3852 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3853 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3854 ++Pos;
3855 }
3856 }
3857 LValue PosLVal;
3858 if (HasIterator) {
3859 PosLVal = CGF.MakeAddrLValue(
3860 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "affs.counter.addr"),
3861 T: C.getSizeType());
3862 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
3863 }
3864 // Process elements with iterators.
3865 for (const auto *C : D.getClausesOfKind<OMPAffinityClause>()) {
3866 const Expr *Modifier = C->getModifier();
3867 if (!Modifier)
3868 continue;
3869 OMPIteratorGeneratorScope IteratorScope(
3870 CGF, cast_or_null<OMPIteratorExpr>(Val: Modifier->IgnoreParenImpCasts()));
3871 for (const Expr *E : C->varlist()) {
3872 llvm::Value *Addr;
3873 llvm::Value *Size;
3874 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
3875 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
3876 LValue Base =
3877 CGF.MakeAddrLValue(Addr: CGF.Builder.CreateGEP(CGF, Addr: AffinitiesArray, Index: Idx),
3878 T: KmpTaskAffinityInfoTy);
3879 // affs[i].base_addr = &<Affinities[i].second>;
3880 LValue BaseAddrLVal = CGF.EmitLValueForField(
3881 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: BaseAddr));
3882 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy),
3883 lvalue: BaseAddrLVal);
3884 // affs[i].len = sizeof(<Affinities[i].second>);
3885 LValue LenLVal = CGF.EmitLValueForField(
3886 Base, Field: *std::next(x: KmpAffinityInfoRD->field_begin(), n: Len));
3887 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
3888 Idx = CGF.Builder.CreateNUWAdd(
3889 LHS: Idx, RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
3890 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
3891 }
3892 }
3893 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3894 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3895 // naffins, kmp_task_affinity_info_t *affin_list);
3896 llvm::Value *LocRef = emitUpdateLocation(CGF, Loc);
3897 llvm::Value *GTid = getThreadID(CGF, Loc);
3898 llvm::Value *AffinListPtr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3899 V: AffinitiesArray.emitRawPointer(CGF), DestTy: CGM.VoidPtrTy);
3900 // FIXME: Emit the function and ignore its result for now unless the
3901 // runtime function is properly implemented.
3902 (void)CGF.EmitRuntimeCall(
3903 callee: OMPBuilder.getOrCreateRuntimeFunction(
3904 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_reg_task_with_affinity),
3905 args: {LocRef, GTid, NewTask, NumOfElements, AffinListPtr});
3906 }
3907 llvm::Value *NewTaskNewTaskTTy =
3908 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3909 V: NewTask, DestTy: KmpTaskTWithPrivatesPtrTy);
3910 LValue Base = CGF.MakeNaturalAlignRawAddrLValue(V: NewTaskNewTaskTTy,
3911 T: KmpTaskTWithPrivatesQTy);
3912 LValue TDBase =
3913 CGF.EmitLValueForField(Base, Field: *KmpTaskTWithPrivatesQTyRD->field_begin());
3914 // Fill the data in the resulting kmp_task_t record.
3915 // Copy shareds if there are any.
3916 Address KmpTaskSharedsPtr = Address::invalid();
3917 if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
3918 KmpTaskSharedsPtr = Address(
3919 CGF.EmitLoadOfScalar(
3920 lvalue: CGF.EmitLValueForField(
3921 Base: TDBase,
3922 Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTShareds)),
3923 Loc),
3924 CGF.Int8Ty, CGM.getNaturalTypeAlignment(T: SharedsTy));
3925 LValue Dest = CGF.MakeAddrLValue(Addr: KmpTaskSharedsPtr, T: SharedsTy);
3926 LValue Src = CGF.MakeAddrLValue(Addr: Shareds, T: SharedsTy);
3927 CGF.EmitAggregateCopy(Dest, Src, EltTy: SharedsTy, MayOverlap: AggValueSlot::DoesNotOverlap);
3928 }
3929 // Emit initial values for private copies (if any).
3930 TaskResultTy Result;
3931 if (!Privates.empty()) {
3932 emitPrivatesInit(CGF, D, KmpTaskSharedsPtr, TDBase: Base, KmpTaskTWithPrivatesQTyRD,
3933 SharedsTy, SharedsPtrTy, Data, Privates,
3934 /*ForDup=*/false);
3935 if (isOpenMPTaskLoopDirective(DKind: D.getDirectiveKind()) &&
3936 (!Data.LastprivateVars.empty() || checkInitIsRequired(CGF, Privates))) {
3937 Result.TaskDupFn = emitTaskDupFunction(
3938 CGM, Loc, D, KmpTaskTWithPrivatesPtrQTy, KmpTaskTWithPrivatesQTyRD,
3939 KmpTaskTQTyRD, SharedsTy, SharedsPtrTy, Data, Privates,
3940 /*WithLastIter=*/!Data.LastprivateVars.empty());
3941 }
3942 }
3943 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3944 enum { Priority = 0, Destructors = 1 };
3945 // Provide pointer to function with destructors for privates.
3946 auto FI = std::next(x: KmpTaskTQTyRD->field_begin(), n: Data1);
3947 const RecordDecl *KmpCmplrdataUD =
3948 (*FI)->getType()->getAsUnionType()->getDecl();
3949 if (NeedsCleanup) {
3950 llvm::Value *DestructorFn = emitDestructorsFunction(
3951 CGM, Loc, KmpInt32Ty, KmpTaskTWithPrivatesPtrQTy,
3952 KmpTaskTWithPrivatesQTy);
3953 LValue Data1LV = CGF.EmitLValueForField(Base: TDBase, Field: *FI);
3954 LValue DestructorsLV = CGF.EmitLValueForField(
3955 Base: Data1LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Destructors));
3956 CGF.EmitStoreOfScalar(value: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
3957 V: DestructorFn, DestTy: KmpRoutineEntryPtrTy),
3958 lvalue: DestructorsLV);
3959 }
3960 // Set priority.
3961 if (Data.Priority.getInt()) {
3962 LValue Data2LV = CGF.EmitLValueForField(
3963 Base: TDBase, Field: *std::next(x: KmpTaskTQTyRD->field_begin(), n: Data2));
3964 LValue PriorityLV = CGF.EmitLValueForField(
3965 Base: Data2LV, Field: *std::next(x: KmpCmplrdataUD->field_begin(), n: Priority));
3966 CGF.EmitStoreOfScalar(value: Data.Priority.getPointer(), lvalue: PriorityLV);
3967 }
3968 Result.NewTask = NewTask;
3969 Result.TaskEntry = TaskEntry;
3970 Result.NewTaskNewTaskTTy = NewTaskNewTaskTTy;
3971 Result.TDBase = TDBase;
3972 Result.KmpTaskTQTyRD = KmpTaskTQTyRD;
3973 return Result;
3974}
3975
3976/// Translates internal dependency kind into the runtime kind.
3977static RTLDependenceKindTy translateDependencyKind(OpenMPDependClauseKind K) {
3978 RTLDependenceKindTy DepKind;
3979 switch (K) {
3980 case OMPC_DEPEND_in:
3981 DepKind = RTLDependenceKindTy::DepIn;
3982 break;
3983 // Out and InOut dependencies must use the same code.
3984 case OMPC_DEPEND_out:
3985 case OMPC_DEPEND_inout:
3986 DepKind = RTLDependenceKindTy::DepInOut;
3987 break;
3988 case OMPC_DEPEND_mutexinoutset:
3989 DepKind = RTLDependenceKindTy::DepMutexInOutSet;
3990 break;
3991 case OMPC_DEPEND_inoutset:
3992 DepKind = RTLDependenceKindTy::DepInOutSet;
3993 break;
3994 case OMPC_DEPEND_outallmemory:
3995 DepKind = RTLDependenceKindTy::DepOmpAllMem;
3996 break;
3997 case OMPC_DEPEND_source:
3998 case OMPC_DEPEND_sink:
3999 case OMPC_DEPEND_depobj:
4000 case OMPC_DEPEND_inoutallmemory:
4001 case OMPC_DEPEND_unknown:
4002 llvm_unreachable("Unknown task dependence type");
4003 }
4004 return DepKind;
4005}
4006
4007/// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4008static void getDependTypes(ASTContext &C, QualType &KmpDependInfoTy,
4009 QualType &FlagsTy) {
4010 FlagsTy = C.getIntTypeForBitwidth(DestWidth: C.getTypeSize(T: C.BoolTy), /*Signed=*/false);
4011 if (KmpDependInfoTy.isNull()) {
4012 RecordDecl *KmpDependInfoRD = C.buildImplicitRecord(Name: "kmp_depend_info");
4013 KmpDependInfoRD->startDefinition();
4014 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getIntPtrType());
4015 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: C.getSizeType());
4016 addFieldToRecordDecl(C, DC: KmpDependInfoRD, FieldTy: FlagsTy);
4017 KmpDependInfoRD->completeDefinition();
4018 KmpDependInfoTy = C.getRecordType(Decl: KmpDependInfoRD);
4019 }
4020}
4021
4022std::pair<llvm::Value *, LValue>
4023CGOpenMPRuntime::getDepobjElements(CodeGenFunction &CGF, LValue DepobjLVal,
4024 SourceLocation Loc) {
4025 ASTContext &C = CGM.getContext();
4026 QualType FlagsTy;
4027 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4028 RecordDecl *KmpDependInfoRD =
4029 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4030 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4031 LValue Base = CGF.EmitLoadOfPointerLValue(
4032 Ptr: DepobjLVal.getAddress().withElementType(
4033 ElemTy: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy)),
4034 PtrTy: KmpDependInfoPtrTy->castAs<PointerType>());
4035 Address DepObjAddr = CGF.Builder.CreateGEP(
4036 CGF, Addr: Base.getAddress(),
4037 Index: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4038 LValue NumDepsBase = CGF.MakeAddrLValue(
4039 Addr: DepObjAddr, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(), TBAAInfo: Base.getTBAAInfo());
4040 // NumDeps = deps[i].base_addr;
4041 LValue BaseAddrLVal = CGF.EmitLValueForField(
4042 Base: NumDepsBase,
4043 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4044 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4045 llvm::Value *NumDeps = CGF.EmitLoadOfScalar(lvalue: BaseAddrLVal, Loc);
4046 return std::make_pair(x&: NumDeps, y&: Base);
4047}
4048
4049static void emitDependData(CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4050 llvm::PointerUnion<unsigned *, LValue *> Pos,
4051 const OMPTaskDataTy::DependData &Data,
4052 Address DependenciesArray) {
4053 CodeGenModule &CGM = CGF.CGM;
4054 ASTContext &C = CGM.getContext();
4055 QualType FlagsTy;
4056 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4057 RecordDecl *KmpDependInfoRD =
4058 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4059 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4060
4061 OMPIteratorGeneratorScope IteratorScope(
4062 CGF, cast_or_null<OMPIteratorExpr>(
4063 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4064 : nullptr));
4065 for (const Expr *E : Data.DepExprs) {
4066 llvm::Value *Addr;
4067 llvm::Value *Size;
4068
4069 // The expression will be a nullptr in the 'omp_all_memory' case.
4070 if (E) {
4071 std::tie(args&: Addr, args&: Size) = getPointerAndSize(CGF, E);
4072 Addr = CGF.Builder.CreatePtrToInt(V: Addr, DestTy: CGF.IntPtrTy);
4073 } else {
4074 Addr = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4075 Size = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0);
4076 }
4077 LValue Base;
4078 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4079 Base = CGF.MakeAddrLValue(
4080 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: *P), T: KmpDependInfoTy);
4081 } else {
4082 assert(E && "Expected a non-null expression");
4083 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4084 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4085 Base = CGF.MakeAddrLValue(
4086 Addr: CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Idx), T: KmpDependInfoTy);
4087 }
4088 // deps[i].base_addr = &<Dependencies[i].second>;
4089 LValue BaseAddrLVal = CGF.EmitLValueForField(
4090 Base,
4091 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4092 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4093 CGF.EmitStoreOfScalar(value: Addr, lvalue: BaseAddrLVal);
4094 // deps[i].len = sizeof(<Dependencies[i].second>);
4095 LValue LenLVal = CGF.EmitLValueForField(
4096 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4097 n: static_cast<unsigned int>(RTLDependInfoFields::Len)));
4098 CGF.EmitStoreOfScalar(value: Size, lvalue: LenLVal);
4099 // deps[i].flags = <Dependencies[i].first>;
4100 RTLDependenceKindTy DepKind = translateDependencyKind(K: Data.DepKind);
4101 LValue FlagsLVal = CGF.EmitLValueForField(
4102 Base,
4103 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4104 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4105 CGF.EmitStoreOfScalar(
4106 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4107 lvalue: FlagsLVal);
4108 if (unsigned *P = dyn_cast<unsigned *>(Val&: Pos)) {
4109 ++(*P);
4110 } else {
4111 LValue &PosLVal = *cast<LValue *>(Val&: Pos);
4112 llvm::Value *Idx = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4113 Idx = CGF.Builder.CreateNUWAdd(LHS: Idx,
4114 RHS: llvm::ConstantInt::get(Ty: Idx->getType(), V: 1));
4115 CGF.EmitStoreOfScalar(value: Idx, lvalue: PosLVal);
4116 }
4117 }
4118}
4119
4120SmallVector<llvm::Value *, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4121 CodeGenFunction &CGF, QualType &KmpDependInfoTy,
4122 const OMPTaskDataTy::DependData &Data) {
4123 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4124 "Expected depobj dependency kind.");
4125 SmallVector<llvm::Value *, 4> Sizes;
4126 SmallVector<LValue, 4> SizeLVals;
4127 ASTContext &C = CGF.getContext();
4128 {
4129 OMPIteratorGeneratorScope IteratorScope(
4130 CGF, cast_or_null<OMPIteratorExpr>(
4131 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4132 : nullptr));
4133 for (const Expr *E : Data.DepExprs) {
4134 llvm::Value *NumDeps;
4135 LValue Base;
4136 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4137 std::tie(args&: NumDeps, args&: Base) =
4138 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4139 LValue NumLVal = CGF.MakeAddrLValue(
4140 Addr: CGF.CreateMemTemp(T: C.getUIntPtrType(), Name: "depobj.size.addr"),
4141 T: C.getUIntPtrType());
4142 CGF.Builder.CreateStore(Val: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0),
4143 Addr: NumLVal.getAddress());
4144 llvm::Value *PrevVal = CGF.EmitLoadOfScalar(lvalue: NumLVal, Loc: E->getExprLoc());
4145 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: PrevVal, RHS: NumDeps);
4146 CGF.EmitStoreOfScalar(value: Add, lvalue: NumLVal);
4147 SizeLVals.push_back(Elt: NumLVal);
4148 }
4149 }
4150 for (unsigned I = 0, E = SizeLVals.size(); I < E; ++I) {
4151 llvm::Value *Size =
4152 CGF.EmitLoadOfScalar(lvalue: SizeLVals[I], Loc: Data.DepExprs[I]->getExprLoc());
4153 Sizes.push_back(Elt: Size);
4154 }
4155 return Sizes;
4156}
4157
4158void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction &CGF,
4159 QualType &KmpDependInfoTy,
4160 LValue PosLVal,
4161 const OMPTaskDataTy::DependData &Data,
4162 Address DependenciesArray) {
4163 assert(Data.DepKind == OMPC_DEPEND_depobj &&
4164 "Expected depobj dependency kind.");
4165 llvm::Value *ElSize = CGF.getTypeSize(Ty: KmpDependInfoTy);
4166 {
4167 OMPIteratorGeneratorScope IteratorScope(
4168 CGF, cast_or_null<OMPIteratorExpr>(
4169 Val: Data.IteratorExpr ? Data.IteratorExpr->IgnoreParenImpCasts()
4170 : nullptr));
4171 for (const Expr *E : Data.DepExprs) {
4172 llvm::Value *NumDeps;
4173 LValue Base;
4174 LValue DepobjLVal = CGF.EmitLValue(E: E->IgnoreParenImpCasts());
4175 std::tie(args&: NumDeps, args&: Base) =
4176 getDepobjElements(CGF, DepobjLVal, Loc: E->getExprLoc());
4177
4178 // memcopy dependency data.
4179 llvm::Value *Size = CGF.Builder.CreateNUWMul(
4180 LHS: ElSize,
4181 RHS: CGF.Builder.CreateIntCast(V: NumDeps, DestTy: CGF.SizeTy, /*isSigned=*/false));
4182 llvm::Value *Pos = CGF.EmitLoadOfScalar(lvalue: PosLVal, Loc: E->getExprLoc());
4183 Address DepAddr = CGF.Builder.CreateGEP(CGF, Addr: DependenciesArray, Index: Pos);
4184 CGF.Builder.CreateMemCpy(Dest: DepAddr, Src: Base.getAddress(), Size);
4185
4186 // Increase pos.
4187 // pos += size;
4188 llvm::Value *Add = CGF.Builder.CreateNUWAdd(LHS: Pos, RHS: NumDeps);
4189 CGF.EmitStoreOfScalar(value: Add, lvalue: PosLVal);
4190 }
4191 }
4192}
4193
4194std::pair<llvm::Value *, Address> CGOpenMPRuntime::emitDependClause(
4195 CodeGenFunction &CGF, ArrayRef<OMPTaskDataTy::DependData> Dependencies,
4196 SourceLocation Loc) {
4197 if (llvm::all_of(Range&: Dependencies, P: [](const OMPTaskDataTy::DependData &D) {
4198 return D.DepExprs.empty();
4199 }))
4200 return std::make_pair(x: nullptr, y: Address::invalid());
4201 // Process list of dependencies.
4202 ASTContext &C = CGM.getContext();
4203 Address DependenciesArray = Address::invalid();
4204 llvm::Value *NumOfElements = nullptr;
4205 unsigned NumDependencies = std::accumulate(
4206 first: Dependencies.begin(), last: Dependencies.end(), init: 0,
4207 binary_op: [](unsigned V, const OMPTaskDataTy::DependData &D) {
4208 return D.DepKind == OMPC_DEPEND_depobj
4209 ? V
4210 : (V + (D.IteratorExpr ? 0 : D.DepExprs.size()));
4211 });
4212 QualType FlagsTy;
4213 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4214 bool HasDepobjDeps = false;
4215 bool HasRegularWithIterators = false;
4216 llvm::Value *NumOfDepobjElements = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4217 llvm::Value *NumOfRegularWithIterators =
4218 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 0);
4219 // Calculate number of depobj dependencies and regular deps with the
4220 // iterators.
4221 for (const OMPTaskDataTy::DependData &D : Dependencies) {
4222 if (D.DepKind == OMPC_DEPEND_depobj) {
4223 SmallVector<llvm::Value *, 4> Sizes =
4224 emitDepobjElementsSizes(CGF, KmpDependInfoTy, Data: D);
4225 for (llvm::Value *Size : Sizes) {
4226 NumOfDepobjElements =
4227 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: Size);
4228 }
4229 HasDepobjDeps = true;
4230 continue;
4231 }
4232 // Include number of iterations, if any.
4233
4234 if (const auto *IE = cast_or_null<OMPIteratorExpr>(Val: D.IteratorExpr)) {
4235 llvm::Value *ClauseIteratorSpace =
4236 llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: 1);
4237 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4238 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4239 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4240 ClauseIteratorSpace = CGF.Builder.CreateNUWMul(LHS: Sz, RHS: ClauseIteratorSpace);
4241 }
4242 llvm::Value *NumClauseDeps = CGF.Builder.CreateNUWMul(
4243 LHS: ClauseIteratorSpace,
4244 RHS: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: D.DepExprs.size()));
4245 NumOfRegularWithIterators =
4246 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumClauseDeps);
4247 HasRegularWithIterators = true;
4248 continue;
4249 }
4250 }
4251
4252 QualType KmpDependInfoArrayTy;
4253 if (HasDepobjDeps || HasRegularWithIterators) {
4254 NumOfElements = llvm::ConstantInt::get(Ty: CGM.IntPtrTy, V: NumDependencies,
4255 /*isSigned=*/IsSigned: false);
4256 if (HasDepobjDeps) {
4257 NumOfElements =
4258 CGF.Builder.CreateNUWAdd(LHS: NumOfDepobjElements, RHS: NumOfElements);
4259 }
4260 if (HasRegularWithIterators) {
4261 NumOfElements =
4262 CGF.Builder.CreateNUWAdd(LHS: NumOfRegularWithIterators, RHS: NumOfElements);
4263 }
4264 auto *OVE = new (C) OpaqueValueExpr(
4265 Loc, C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4266 VK_PRValue);
4267 CodeGenFunction::OpaqueValueMapping OpaqueMap(CGF, OVE,
4268 RValue::get(V: NumOfElements));
4269 KmpDependInfoArrayTy =
4270 C.getVariableArrayType(EltTy: KmpDependInfoTy, NumElts: OVE, ASM: ArraySizeModifier::Normal,
4271 /*IndexTypeQuals=*/0);
4272 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4273 // Properly emit variable-sized array.
4274 auto *PD = ImplicitParamDecl::Create(C, T: KmpDependInfoArrayTy,
4275 ParamKind: ImplicitParamKind::Other);
4276 CGF.EmitVarDecl(D: *PD);
4277 DependenciesArray = CGF.GetAddrOfLocalVar(VD: PD);
4278 NumOfElements = CGF.Builder.CreateIntCast(V: NumOfElements, DestTy: CGF.Int32Ty,
4279 /*isSigned=*/false);
4280 } else {
4281 KmpDependInfoArrayTy = C.getConstantArrayType(
4282 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies), SizeExpr: nullptr,
4283 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4284 DependenciesArray =
4285 CGF.CreateMemTemp(T: KmpDependInfoArrayTy, Name: ".dep.arr.addr");
4286 DependenciesArray = CGF.Builder.CreateConstArrayGEP(Addr: DependenciesArray, Index: 0);
4287 NumOfElements = llvm::ConstantInt::get(Ty: CGM.Int32Ty, V: NumDependencies,
4288 /*isSigned=*/IsSigned: false);
4289 }
4290 unsigned Pos = 0;
4291 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4292 if (Dep.DepKind == OMPC_DEPEND_depobj || Dep.IteratorExpr)
4293 continue;
4294 emitDependData(CGF, KmpDependInfoTy, Pos: &Pos, Data: Dep, DependenciesArray);
4295 }
4296 // Copy regular dependencies with iterators.
4297 LValue PosLVal = CGF.MakeAddrLValue(
4298 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "dep.counter.addr"), T: C.getSizeType());
4299 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Pos), lvalue: PosLVal);
4300 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4301 if (Dep.DepKind == OMPC_DEPEND_depobj || !Dep.IteratorExpr)
4302 continue;
4303 emitDependData(CGF, KmpDependInfoTy, Pos: &PosLVal, Data: Dep, DependenciesArray);
4304 }
4305 // Copy final depobj arrays without iterators.
4306 if (HasDepobjDeps) {
4307 for (const OMPTaskDataTy::DependData &Dep : Dependencies) {
4308 if (Dep.DepKind != OMPC_DEPEND_depobj)
4309 continue;
4310 emitDepobjElements(CGF, KmpDependInfoTy, PosLVal, Data: Dep, DependenciesArray);
4311 }
4312 }
4313 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4314 Addr: DependenciesArray, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty);
4315 return std::make_pair(x&: NumOfElements, y&: DependenciesArray);
4316}
4317
4318Address CGOpenMPRuntime::emitDepobjDependClause(
4319 CodeGenFunction &CGF, const OMPTaskDataTy::DependData &Dependencies,
4320 SourceLocation Loc) {
4321 if (Dependencies.DepExprs.empty())
4322 return Address::invalid();
4323 // Process list of dependencies.
4324 ASTContext &C = CGM.getContext();
4325 Address DependenciesArray = Address::invalid();
4326 unsigned NumDependencies = Dependencies.DepExprs.size();
4327 QualType FlagsTy;
4328 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4329 RecordDecl *KmpDependInfoRD =
4330 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4331
4332 llvm::Value *Size;
4333 // Define type kmp_depend_info[<Dependencies.size()>];
4334 // For depobj reserve one extra element to store the number of elements.
4335 // It is required to handle depobj(x) update(in) construct.
4336 // kmp_depend_info[<Dependencies.size()>] deps;
4337 llvm::Value *NumDepsVal;
4338 CharUnits Align = C.getTypeAlignInChars(T: KmpDependInfoTy);
4339 if (const auto *IE =
4340 cast_or_null<OMPIteratorExpr>(Val: Dependencies.IteratorExpr)) {
4341 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1);
4342 for (unsigned I = 0, E = IE->numOfIterators(); I < E; ++I) {
4343 llvm::Value *Sz = CGF.EmitScalarExpr(E: IE->getHelper(I).Upper);
4344 Sz = CGF.Builder.CreateIntCast(V: Sz, DestTy: CGF.SizeTy, /*isSigned=*/false);
4345 NumDepsVal = CGF.Builder.CreateNUWMul(LHS: NumDepsVal, RHS: Sz);
4346 }
4347 Size = CGF.Builder.CreateNUWAdd(LHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1),
4348 RHS: NumDepsVal);
4349 CharUnits SizeInBytes =
4350 C.getTypeSizeInChars(T: KmpDependInfoTy).alignTo(Align);
4351 llvm::Value *RecSize = CGM.getSize(numChars: SizeInBytes);
4352 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: RecSize);
4353 NumDepsVal =
4354 CGF.Builder.CreateIntCast(V: NumDepsVal, DestTy: CGF.IntPtrTy, /*isSigned=*/false);
4355 } else {
4356 QualType KmpDependInfoArrayTy = C.getConstantArrayType(
4357 EltTy: KmpDependInfoTy, ArySize: llvm::APInt(/*numBits=*/64, NumDependencies + 1),
4358 SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
4359 CharUnits Sz = C.getTypeSizeInChars(T: KmpDependInfoArrayTy);
4360 Size = CGM.getSize(numChars: Sz.alignTo(Align));
4361 NumDepsVal = llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: NumDependencies);
4362 }
4363 // Need to allocate on the dynamic memory.
4364 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4365 // Use default allocator.
4366 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4367 llvm::Value *Args[] = {ThreadID, Size, Allocator};
4368
4369 llvm::Value *Addr =
4370 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4371 M&: CGM.getModule(), FnID: OMPRTL___kmpc_alloc),
4372 args: Args, name: ".dep.arr.addr");
4373 llvm::Type *KmpDependInfoLlvmTy = CGF.ConvertTypeForMem(T: KmpDependInfoTy);
4374 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4375 V: Addr, DestTy: CGF.Builder.getPtrTy(AddrSpace: 0));
4376 DependenciesArray = Address(Addr, KmpDependInfoLlvmTy, Align);
4377 // Write number of elements in the first element of array for depobj.
4378 LValue Base = CGF.MakeAddrLValue(Addr: DependenciesArray, T: KmpDependInfoTy);
4379 // deps[i].base_addr = NumDependencies;
4380 LValue BaseAddrLVal = CGF.EmitLValueForField(
4381 Base,
4382 Field: *std::next(x: KmpDependInfoRD->field_begin(),
4383 n: static_cast<unsigned int>(RTLDependInfoFields::BaseAddr)));
4384 CGF.EmitStoreOfScalar(value: NumDepsVal, lvalue: BaseAddrLVal);
4385 llvm::PointerUnion<unsigned *, LValue *> Pos;
4386 unsigned Idx = 1;
4387 LValue PosLVal;
4388 if (Dependencies.IteratorExpr) {
4389 PosLVal = CGF.MakeAddrLValue(
4390 Addr: CGF.CreateMemTemp(T: C.getSizeType(), Name: "iterator.counter.addr"),
4391 T: C.getSizeType());
4392 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: Idx), lvalue: PosLVal,
4393 /*IsInit=*/isInit: true);
4394 Pos = &PosLVal;
4395 } else {
4396 Pos = &Idx;
4397 }
4398 emitDependData(CGF, KmpDependInfoTy, Pos, Data: Dependencies, DependenciesArray);
4399 DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4400 Addr: CGF.Builder.CreateConstGEP(Addr: DependenciesArray, Index: 1), Ty: CGF.VoidPtrTy,
4401 ElementTy: CGF.Int8Ty);
4402 return DependenciesArray;
4403}
4404
4405void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction &CGF, LValue DepobjLVal,
4406 SourceLocation Loc) {
4407 ASTContext &C = CGM.getContext();
4408 QualType FlagsTy;
4409 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4410 LValue Base = CGF.EmitLoadOfPointerLValue(Ptr: DepobjLVal.getAddress(),
4411 PtrTy: C.VoidPtrTy.castAs<PointerType>());
4412 QualType KmpDependInfoPtrTy = C.getPointerType(T: KmpDependInfoTy);
4413 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4414 Addr: Base.getAddress(), Ty: CGF.ConvertTypeForMem(T: KmpDependInfoPtrTy),
4415 ElementTy: CGF.ConvertTypeForMem(T: KmpDependInfoTy));
4416 llvm::Value *DepObjAddr = CGF.Builder.CreateGEP(
4417 Ty: Addr.getElementType(), Ptr: Addr.emitRawPointer(CGF),
4418 IdxList: llvm::ConstantInt::get(Ty: CGF.IntPtrTy, V: -1, /*isSigned=*/IsSigned: true));
4419 DepObjAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: DepObjAddr,
4420 DestTy: CGF.VoidPtrTy);
4421 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4422 // Use default allocator.
4423 llvm::Value *Allocator = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4424 llvm::Value *Args[] = {ThreadID, DepObjAddr, Allocator};
4425
4426 // _kmpc_free(gtid, addr, nullptr);
4427 (void)CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4428 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free),
4429 args: Args);
4430}
4431
4432void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction &CGF, LValue DepobjLVal,
4433 OpenMPDependClauseKind NewDepKind,
4434 SourceLocation Loc) {
4435 ASTContext &C = CGM.getContext();
4436 QualType FlagsTy;
4437 getDependTypes(C, KmpDependInfoTy, FlagsTy);
4438 RecordDecl *KmpDependInfoRD =
4439 cast<RecordDecl>(Val: KmpDependInfoTy->getAsTagDecl());
4440 llvm::Type *LLVMFlagsTy = CGF.ConvertTypeForMem(T: FlagsTy);
4441 llvm::Value *NumDeps;
4442 LValue Base;
4443 std::tie(args&: NumDeps, args&: Base) = getDepobjElements(CGF, DepobjLVal, Loc);
4444
4445 Address Begin = Base.getAddress();
4446 // Cast from pointer to array type to pointer to single element.
4447 llvm::Value *End = CGF.Builder.CreateGEP(Ty: Begin.getElementType(),
4448 Ptr: Begin.emitRawPointer(CGF), IdxList: NumDeps);
4449 // The basic structure here is a while-do loop.
4450 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.body");
4451 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.done");
4452 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4453 CGF.EmitBlock(BB: BodyBB);
4454 llvm::PHINode *ElementPHI =
4455 CGF.Builder.CreatePHI(Ty: Begin.getType(), NumReservedValues: 2, Name: "omp.elementPast");
4456 ElementPHI->addIncoming(V: Begin.emitRawPointer(CGF), BB: EntryBB);
4457 Begin = Begin.withPointer(NewPointer: ElementPHI, IsKnownNonNull: KnownNonNull);
4458 Base = CGF.MakeAddrLValue(Addr: Begin, T: KmpDependInfoTy, BaseInfo: Base.getBaseInfo(),
4459 TBAAInfo: Base.getTBAAInfo());
4460 // deps[i].flags = NewDepKind;
4461 RTLDependenceKindTy DepKind = translateDependencyKind(K: NewDepKind);
4462 LValue FlagsLVal = CGF.EmitLValueForField(
4463 Base, Field: *std::next(x: KmpDependInfoRD->field_begin(),
4464 n: static_cast<unsigned int>(RTLDependInfoFields::Flags)));
4465 CGF.EmitStoreOfScalar(
4466 value: llvm::ConstantInt::get(Ty: LLVMFlagsTy, V: static_cast<unsigned int>(DepKind)),
4467 lvalue: FlagsLVal);
4468
4469 // Shift the address forward by one element.
4470 llvm::Value *ElementNext =
4471 CGF.Builder.CreateConstGEP(Addr: Begin, /*Index=*/1, Name: "omp.elementNext")
4472 .emitRawPointer(CGF);
4473 ElementPHI->addIncoming(V: ElementNext, BB: CGF.Builder.GetInsertBlock());
4474 llvm::Value *IsEmpty =
4475 CGF.Builder.CreateICmpEQ(LHS: ElementNext, RHS: End, Name: "omp.isempty");
4476 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4477 // Done.
4478 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4479}
4480
4481void CGOpenMPRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
4482 const OMPExecutableDirective &D,
4483 llvm::Function *TaskFunction,
4484 QualType SharedsTy, Address Shareds,
4485 const Expr *IfCond,
4486 const OMPTaskDataTy &Data) {
4487 if (!CGF.HaveInsertPoint())
4488 return;
4489
4490 TaskResultTy Result =
4491 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4492 llvm::Value *NewTask = Result.NewTask;
4493 llvm::Function *TaskEntry = Result.TaskEntry;
4494 llvm::Value *NewTaskNewTaskTTy = Result.NewTaskNewTaskTTy;
4495 LValue TDBase = Result.TDBase;
4496 const RecordDecl *KmpTaskTQTyRD = Result.KmpTaskTQTyRD;
4497 // Process list of dependences.
4498 Address DependenciesArray = Address::invalid();
4499 llvm::Value *NumOfElements;
4500 std::tie(args&: NumOfElements, args&: DependenciesArray) =
4501 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
4502
4503 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4504 // libcall.
4505 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4506 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4507 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4508 // list is not empty
4509 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4510 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4511 llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
4512 llvm::Value *DepTaskArgs[7];
4513 if (!Data.Dependences.empty()) {
4514 DepTaskArgs[0] = UpLoc;
4515 DepTaskArgs[1] = ThreadID;
4516 DepTaskArgs[2] = NewTask;
4517 DepTaskArgs[3] = NumOfElements;
4518 DepTaskArgs[4] = DependenciesArray.emitRawPointer(CGF);
4519 DepTaskArgs[5] = CGF.Builder.getInt32(C: 0);
4520 DepTaskArgs[6] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4521 }
4522 auto &&ThenCodeGen = [this, &Data, TDBase, KmpTaskTQTyRD, &TaskArgs,
4523 &DepTaskArgs](CodeGenFunction &CGF, PrePostActionTy &) {
4524 if (!Data.Tied) {
4525 auto PartIdFI = std::next(x: KmpTaskTQTyRD->field_begin(), n: KmpTaskTPartId);
4526 LValue PartIdLVal = CGF.EmitLValueForField(Base: TDBase, Field: *PartIdFI);
4527 CGF.EmitStoreOfScalar(value: CGF.Builder.getInt32(C: 0), lvalue: PartIdLVal);
4528 }
4529 if (!Data.Dependences.empty()) {
4530 CGF.EmitRuntimeCall(
4531 callee: OMPBuilder.getOrCreateRuntimeFunction(
4532 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task_with_deps),
4533 args: DepTaskArgs);
4534 } else {
4535 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4536 M&: CGM.getModule(), FnID: OMPRTL___kmpc_omp_task),
4537 args: TaskArgs);
4538 }
4539 // Check if parent region is untied and build return for untied task;
4540 if (auto *Region =
4541 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
4542 Region->emitUntiedSwitch(CGF);
4543 };
4544
4545 llvm::Value *DepWaitTaskArgs[7];
4546 if (!Data.Dependences.empty()) {
4547 DepWaitTaskArgs[0] = UpLoc;
4548 DepWaitTaskArgs[1] = ThreadID;
4549 DepWaitTaskArgs[2] = NumOfElements;
4550 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
4551 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
4552 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4553 DepWaitTaskArgs[6] =
4554 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
4555 }
4556 auto &M = CGM.getModule();
4557 auto &&ElseCodeGen = [this, &M, &TaskArgs, ThreadID, NewTaskNewTaskTTy,
4558 TaskEntry, &Data, &DepWaitTaskArgs,
4559 Loc](CodeGenFunction &CGF, PrePostActionTy &) {
4560 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
4561 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4562 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4563 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4564 // is specified.
4565 if (!Data.Dependences.empty())
4566 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4567 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
4568 args: DepWaitTaskArgs);
4569 // Call proxy_task_entry(gtid, new_task);
4570 auto &&CodeGen = [TaskEntry, ThreadID, NewTaskNewTaskTTy,
4571 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
4572 Action.Enter(CGF);
4573 llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
4574 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc, OutlinedFn: TaskEntry,
4575 Args: OutlinedFnArgs);
4576 };
4577
4578 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4579 // kmp_task_t *new_task);
4580 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4581 // kmp_task_t *new_task);
4582 RegionCodeGenTy RCG(CodeGen);
4583 CommonActionTy Action(OMPBuilder.getOrCreateRuntimeFunction(
4584 M, FnID: OMPRTL___kmpc_omp_task_begin_if0),
4585 TaskArgs,
4586 OMPBuilder.getOrCreateRuntimeFunction(
4587 M, FnID: OMPRTL___kmpc_omp_task_complete_if0),
4588 TaskArgs);
4589 RCG.setAction(Action);
4590 RCG(CGF);
4591 };
4592
4593 if (IfCond) {
4594 emitIfClause(CGF, Cond: IfCond, ThenGen: ThenCodeGen, ElseGen: ElseCodeGen);
4595 } else {
4596 RegionCodeGenTy ThenRCG(ThenCodeGen);
4597 ThenRCG(CGF);
4598 }
4599}
4600
4601void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction &CGF, SourceLocation Loc,
4602 const OMPLoopDirective &D,
4603 llvm::Function *TaskFunction,
4604 QualType SharedsTy, Address Shareds,
4605 const Expr *IfCond,
4606 const OMPTaskDataTy &Data) {
4607 if (!CGF.HaveInsertPoint())
4608 return;
4609 TaskResultTy Result =
4610 emitTaskInit(CGF, Loc, D, TaskFunction, SharedsTy, Shareds, Data);
4611 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4612 // libcall.
4613 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4614 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4615 // sched, kmp_uint64 grainsize, void *task_dup);
4616 llvm::Value *ThreadID = getThreadID(CGF, Loc);
4617 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
4618 llvm::Value *IfVal;
4619 if (IfCond) {
4620 IfVal = CGF.Builder.CreateIntCast(V: CGF.EvaluateExprAsBool(E: IfCond), DestTy: CGF.IntTy,
4621 /*isSigned=*/true);
4622 } else {
4623 IfVal = llvm::ConstantInt::getSigned(Ty: CGF.IntTy, /*V=*/1);
4624 }
4625
4626 LValue LBLVal = CGF.EmitLValueForField(
4627 Base: Result.TDBase,
4628 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTLowerBound));
4629 const auto *LBVar =
4630 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getLowerBoundVariable())->getDecl());
4631 CGF.EmitAnyExprToMem(E: LBVar->getInit(), Location: LBLVal.getAddress(), Quals: LBLVal.getQuals(),
4632 /*IsInitializer=*/true);
4633 LValue UBLVal = CGF.EmitLValueForField(
4634 Base: Result.TDBase,
4635 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTUpperBound));
4636 const auto *UBVar =
4637 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getUpperBoundVariable())->getDecl());
4638 CGF.EmitAnyExprToMem(E: UBVar->getInit(), Location: UBLVal.getAddress(), Quals: UBLVal.getQuals(),
4639 /*IsInitializer=*/true);
4640 LValue StLVal = CGF.EmitLValueForField(
4641 Base: Result.TDBase,
4642 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTStride));
4643 const auto *StVar =
4644 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D.getStrideVariable())->getDecl());
4645 CGF.EmitAnyExprToMem(E: StVar->getInit(), Location: StLVal.getAddress(), Quals: StLVal.getQuals(),
4646 /*IsInitializer=*/true);
4647 // Store reductions address.
4648 LValue RedLVal = CGF.EmitLValueForField(
4649 Base: Result.TDBase,
4650 Field: *std::next(x: Result.KmpTaskTQTyRD->field_begin(), n: KmpTaskTReductions));
4651 if (Data.Reductions) {
4652 CGF.EmitStoreOfScalar(value: Data.Reductions, lvalue: RedLVal);
4653 } else {
4654 CGF.EmitNullInitialization(DestPtr: RedLVal.getAddress(),
4655 Ty: CGF.getContext().VoidPtrTy);
4656 }
4657 enum { NoSchedule = 0, Grainsize = 1, NumTasks = 2 };
4658 llvm::SmallVector<llvm::Value *, 12> TaskArgs{
4659 UpLoc,
4660 ThreadID,
4661 Result.NewTask,
4662 IfVal,
4663 LBLVal.getPointer(CGF),
4664 UBLVal.getPointer(CGF),
4665 CGF.EmitLoadOfScalar(lvalue: StLVal, Loc),
4666 llvm::ConstantInt::getSigned(
4667 Ty: CGF.IntTy, V: 1), // Always 1 because taskgroup emitted by the compiler
4668 llvm::ConstantInt::getSigned(
4669 Ty: CGF.IntTy, V: Data.Schedule.getPointer()
4670 ? Data.Schedule.getInt() ? NumTasks : Grainsize
4671 : NoSchedule),
4672 Data.Schedule.getPointer()
4673 ? CGF.Builder.CreateIntCast(V: Data.Schedule.getPointer(), DestTy: CGF.Int64Ty,
4674 /*isSigned=*/false)
4675 : llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/0)};
4676 if (Data.HasModifier)
4677 TaskArgs.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: 1));
4678
4679 TaskArgs.push_back(Elt: Result.TaskDupFn
4680 ? CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4681 V: Result.TaskDupFn, DestTy: CGF.VoidPtrTy)
4682 : llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy));
4683 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
4684 M&: CGM.getModule(), FnID: Data.HasModifier
4685 ? OMPRTL___kmpc_taskloop_5
4686 : OMPRTL___kmpc_taskloop),
4687 args: TaskArgs);
4688}
4689
4690/// Emit reduction operation for each element of array (required for
4691/// array sections) LHS op = RHS.
4692/// \param Type Type of array.
4693/// \param LHSVar Variable on the left side of the reduction operation
4694/// (references element of array in original variable).
4695/// \param RHSVar Variable on the right side of the reduction operation
4696/// (references element of array in original variable).
4697/// \param RedOpGen Generator of reduction operation with use of LHSVar and
4698/// RHSVar.
4699static void EmitOMPAggregateReduction(
4700 CodeGenFunction &CGF, QualType Type, const VarDecl *LHSVar,
4701 const VarDecl *RHSVar,
4702 const llvm::function_ref<void(CodeGenFunction &CGF, const Expr *,
4703 const Expr *, const Expr *)> &RedOpGen,
4704 const Expr *XExpr = nullptr, const Expr *EExpr = nullptr,
4705 const Expr *UpExpr = nullptr) {
4706 // Perform element-by-element initialization.
4707 QualType ElementTy;
4708 Address LHSAddr = CGF.GetAddrOfLocalVar(VD: LHSVar);
4709 Address RHSAddr = CGF.GetAddrOfLocalVar(VD: RHSVar);
4710
4711 // Drill down to the base element type on both arrays.
4712 const ArrayType *ArrayTy = Type->getAsArrayTypeUnsafe();
4713 llvm::Value *NumElements = CGF.emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: LHSAddr);
4714
4715 llvm::Value *RHSBegin = RHSAddr.emitRawPointer(CGF);
4716 llvm::Value *LHSBegin = LHSAddr.emitRawPointer(CGF);
4717 // Cast from pointer to array type to pointer to single element.
4718 llvm::Value *LHSEnd =
4719 CGF.Builder.CreateGEP(Ty: LHSAddr.getElementType(), Ptr: LHSBegin, IdxList: NumElements);
4720 // The basic structure here is a while-do loop.
4721 llvm::BasicBlock *BodyBB = CGF.createBasicBlock(name: "omp.arraycpy.body");
4722 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "omp.arraycpy.done");
4723 llvm::Value *IsEmpty =
4724 CGF.Builder.CreateICmpEQ(LHS: LHSBegin, RHS: LHSEnd, Name: "omp.arraycpy.isempty");
4725 CGF.Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
4726
4727 // Enter the loop body, making that address the current address.
4728 llvm::BasicBlock *EntryBB = CGF.Builder.GetInsertBlock();
4729 CGF.EmitBlock(BB: BodyBB);
4730
4731 CharUnits ElementSize = CGF.getContext().getTypeSizeInChars(T: ElementTy);
4732
4733 llvm::PHINode *RHSElementPHI = CGF.Builder.CreatePHI(
4734 Ty: RHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
4735 RHSElementPHI->addIncoming(V: RHSBegin, BB: EntryBB);
4736 Address RHSElementCurrent(
4737 RHSElementPHI, RHSAddr.getElementType(),
4738 RHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4739
4740 llvm::PHINode *LHSElementPHI = CGF.Builder.CreatePHI(
4741 Ty: LHSBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
4742 LHSElementPHI->addIncoming(V: LHSBegin, BB: EntryBB);
4743 Address LHSElementCurrent(
4744 LHSElementPHI, LHSAddr.getElementType(),
4745 LHSAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
4746
4747 // Emit copy.
4748 CodeGenFunction::OMPPrivateScope Scope(CGF);
4749 Scope.addPrivate(LocalVD: LHSVar, Addr: LHSElementCurrent);
4750 Scope.addPrivate(LocalVD: RHSVar, Addr: RHSElementCurrent);
4751 Scope.Privatize();
4752 RedOpGen(CGF, XExpr, EExpr, UpExpr);
4753 Scope.ForceCleanup();
4754
4755 // Shift the address forward by one element.
4756 llvm::Value *LHSElementNext = CGF.Builder.CreateConstGEP1_32(
4757 Ty: LHSAddr.getElementType(), Ptr: LHSElementPHI, /*Idx0=*/1,
4758 Name: "omp.arraycpy.dest.element");
4759 llvm::Value *RHSElementNext = CGF.Builder.CreateConstGEP1_32(
4760 Ty: RHSAddr.getElementType(), Ptr: RHSElementPHI, /*Idx0=*/1,
4761 Name: "omp.arraycpy.src.element");
4762 // Check whether we've reached the end.
4763 llvm::Value *Done =
4764 CGF.Builder.CreateICmpEQ(LHS: LHSElementNext, RHS: LHSEnd, Name: "omp.arraycpy.done");
4765 CGF.Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
4766 LHSElementPHI->addIncoming(V: LHSElementNext, BB: CGF.Builder.GetInsertBlock());
4767 RHSElementPHI->addIncoming(V: RHSElementNext, BB: CGF.Builder.GetInsertBlock());
4768
4769 // Done.
4770 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
4771}
4772
4773/// Emit reduction combiner. If the combiner is a simple expression emit it as
4774/// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4775/// UDR combiner function.
4776static void emitReductionCombiner(CodeGenFunction &CGF,
4777 const Expr *ReductionOp) {
4778 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp))
4779 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(Val: CE->getCallee()))
4780 if (const auto *DRE =
4781 dyn_cast<DeclRefExpr>(Val: OVE->getSourceExpr()->IgnoreImpCasts()))
4782 if (const auto *DRD =
4783 dyn_cast<OMPDeclareReductionDecl>(Val: DRE->getDecl())) {
4784 std::pair<llvm::Function *, llvm::Function *> Reduction =
4785 CGF.CGM.getOpenMPRuntime().getUserDefinedReduction(D: DRD);
4786 RValue Func = RValue::get(V: Reduction.first);
4787 CodeGenFunction::OpaqueValueMapping Map(CGF, OVE, Func);
4788 CGF.EmitIgnoredExpr(E: ReductionOp);
4789 return;
4790 }
4791 CGF.EmitIgnoredExpr(E: ReductionOp);
4792}
4793
4794llvm::Function *CGOpenMPRuntime::emitReductionFunction(
4795 StringRef ReducerName, SourceLocation Loc, llvm::Type *ArgsElemType,
4796 ArrayRef<const Expr *> Privates, ArrayRef<const Expr *> LHSExprs,
4797 ArrayRef<const Expr *> RHSExprs, ArrayRef<const Expr *> ReductionOps) {
4798 ASTContext &C = CGM.getContext();
4799
4800 // void reduction_func(void *LHSArg, void *RHSArg);
4801 FunctionArgList Args;
4802 ImplicitParamDecl LHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4803 ImplicitParamKind::Other);
4804 ImplicitParamDecl RHSArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
4805 ImplicitParamKind::Other);
4806 Args.push_back(Elt: &LHSArg);
4807 Args.push_back(Elt: &RHSArg);
4808 const auto &CGFI =
4809 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
4810 std::string Name = getReductionFuncName(Name: ReducerName);
4811 auto *Fn = llvm::Function::Create(Ty: CGM.getTypes().GetFunctionType(Info: CGFI),
4812 Linkage: llvm::GlobalValue::InternalLinkage, N: Name,
4813 M: &CGM.getModule());
4814 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: CGFI);
4815 Fn->setDoesNotRecurse();
4816 CodeGenFunction CGF(CGM);
4817 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo: CGFI, Args, Loc, StartLoc: Loc);
4818
4819 // Dst = (void*[n])(LHSArg);
4820 // Src = (void*[n])(RHSArg);
4821 Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4822 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &LHSArg)),
4823 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4824 ArgsElemType, CGF.getPointerAlign());
4825 Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
4826 V: CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(VD: &RHSArg)),
4827 DestTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
4828 ArgsElemType, CGF.getPointerAlign());
4829
4830 // ...
4831 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4832 // ...
4833 CodeGenFunction::OMPPrivateScope Scope(CGF);
4834 const auto *IPriv = Privates.begin();
4835 unsigned Idx = 0;
4836 for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I, ++IPriv, ++Idx) {
4837 const auto *RHSVar =
4838 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSExprs[I])->getDecl());
4839 Scope.addPrivate(LocalVD: RHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: RHS, Index: Idx, Var: RHSVar));
4840 const auto *LHSVar =
4841 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSExprs[I])->getDecl());
4842 Scope.addPrivate(LocalVD: LHSVar, Addr: emitAddrOfVarFromArray(CGF, Array: LHS, Index: Idx, Var: LHSVar));
4843 QualType PrivTy = (*IPriv)->getType();
4844 if (PrivTy->isVariablyModifiedType()) {
4845 // Get array size and emit VLA type.
4846 ++Idx;
4847 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: LHS, Index: Idx);
4848 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Elem);
4849 const VariableArrayType *VLA =
4850 CGF.getContext().getAsVariableArrayType(T: PrivTy);
4851 const auto *OVE = cast<OpaqueValueExpr>(Val: VLA->getSizeExpr());
4852 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4853 CGF, OVE, RValue::get(V: CGF.Builder.CreatePtrToInt(V: Ptr, DestTy: CGF.SizeTy)));
4854 CGF.EmitVariablyModifiedType(Ty: PrivTy);
4855 }
4856 }
4857 Scope.Privatize();
4858 IPriv = Privates.begin();
4859 const auto *ILHS = LHSExprs.begin();
4860 const auto *IRHS = RHSExprs.begin();
4861 for (const Expr *E : ReductionOps) {
4862 if ((*IPriv)->getType()->isArrayType()) {
4863 // Emit reduction for array section.
4864 const auto *LHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
4865 const auto *RHSVar = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
4866 EmitOMPAggregateReduction(
4867 CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
4868 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4869 emitReductionCombiner(CGF, ReductionOp: E);
4870 });
4871 } else {
4872 // Emit reduction for array subscript or single variable.
4873 emitReductionCombiner(CGF, ReductionOp: E);
4874 }
4875 ++IPriv;
4876 ++ILHS;
4877 ++IRHS;
4878 }
4879 Scope.ForceCleanup();
4880 CGF.FinishFunction();
4881 return Fn;
4882}
4883
4884void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
4885 const Expr *ReductionOp,
4886 const Expr *PrivateRef,
4887 const DeclRefExpr *LHS,
4888 const DeclRefExpr *RHS) {
4889 if (PrivateRef->getType()->isArrayType()) {
4890 // Emit reduction for array section.
4891 const auto *LHSVar = cast<VarDecl>(Val: LHS->getDecl());
4892 const auto *RHSVar = cast<VarDecl>(Val: RHS->getDecl());
4893 EmitOMPAggregateReduction(
4894 CGF, Type: PrivateRef->getType(), LHSVar, RHSVar,
4895 RedOpGen: [=](CodeGenFunction &CGF, const Expr *, const Expr *, const Expr *) {
4896 emitReductionCombiner(CGF, ReductionOp);
4897 });
4898 } else {
4899 // Emit reduction for array subscript or single variable.
4900 emitReductionCombiner(CGF, ReductionOp);
4901 }
4902}
4903
4904static std::string generateUniqueName(CodeGenModule &CGM,
4905 llvm::StringRef Prefix, const Expr *Ref);
4906
4907void CGOpenMPRuntime::emitPrivateReduction(
4908 CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
4909 const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
4910
4911 // Create a shared global variable (__shared_reduction_var) to accumulate the
4912 // final result.
4913 //
4914 // Call __kmpc_barrier to synchronize threads before initialization.
4915 //
4916 // The master thread (thread_id == 0) initializes __shared_reduction_var
4917 // with the identity value or initializer.
4918 //
4919 // Call __kmpc_barrier to synchronize before combining.
4920 // For each i:
4921 // - Thread enters critical section.
4922 // - Reads its private value from LHSExprs[i].
4923 // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
4924 // Privates[i]).
4925 // - Exits critical section.
4926 //
4927 // Call __kmpc_barrier after combining.
4928 //
4929 // Each thread copies __shared_reduction_var[i] back to RHSExprs[i].
4930 //
4931 // Final __kmpc_barrier to synchronize after broadcasting
4932 QualType PrivateType = Privates->getType();
4933 llvm::Type *LLVMType = CGF.ConvertTypeForMem(T: PrivateType);
4934
4935 const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOp: ReductionOps);
4936 std::string ReductionVarNameStr;
4937 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates->IgnoreParenCasts()))
4938 ReductionVarNameStr =
4939 generateUniqueName(CGM, Prefix: DRE->getDecl()->getNameAsString(), Ref: Privates);
4940 else
4941 ReductionVarNameStr = "unnamed_priv_var";
4942
4943 // Create an internal shared variable
4944 std::string SharedName =
4945 CGM.getOpenMPRuntime().getName(Parts: {"internal_pivate_", ReductionVarNameStr});
4946 llvm::GlobalVariable *SharedVar = OMPBuilder.getOrCreateInternalVariable(
4947 Ty: LLVMType, Name: ".omp.reduction." + SharedName);
4948
4949 SharedVar->setAlignment(
4950 llvm::MaybeAlign(CGF.getContext().getTypeAlign(T: PrivateType) / 8));
4951
4952 Address SharedResult =
4953 CGF.MakeNaturalAlignRawAddrLValue(V: SharedVar, T: PrivateType).getAddress();
4954
4955 llvm::Value *ThreadId = getThreadID(CGF, Loc);
4956 llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
4957 llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
4958
4959 llvm::BasicBlock *InitBB = CGF.createBasicBlock(name: "init");
4960 llvm::BasicBlock *InitEndBB = CGF.createBasicBlock(name: "init.end");
4961
4962 llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
4963 LHS: ThreadId, RHS: llvm::ConstantInt::get(Ty: ThreadId->getType(), V: 0));
4964 CGF.Builder.CreateCondBr(Cond: IsWorker, True: InitBB, False: InitEndBB);
4965
4966 CGF.EmitBlock(BB: InitBB);
4967
4968 auto EmitSharedInit = [&]() {
4969 if (UDR) { // Check if it's a User-Defined Reduction
4970 if (const Expr *UDRInitExpr = UDR->getInitializer()) {
4971 std::pair<llvm::Function *, llvm::Function *> FnPair =
4972 getUserDefinedReduction(D: UDR);
4973 llvm::Function *InitializerFn = FnPair.second;
4974 if (InitializerFn) {
4975 if (const auto *CE =
4976 dyn_cast<CallExpr>(Val: UDRInitExpr->IgnoreParenImpCasts())) {
4977 const auto *OutDRE = cast<DeclRefExpr>(
4978 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
4979 ->getSubExpr());
4980 const VarDecl *OutVD = cast<VarDecl>(Val: OutDRE->getDecl());
4981
4982 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
4983 LocalScope.addPrivate(LocalVD: OutVD, Addr: SharedResult);
4984
4985 (void)LocalScope.Privatize();
4986 if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
4987 Val: CE->getCallee()->IgnoreParenImpCasts())) {
4988 CodeGenFunction::OpaqueValueMapping OpaqueMap(
4989 CGF, OVE, RValue::get(V: InitializerFn));
4990 CGF.EmitIgnoredExpr(E: CE);
4991 } else {
4992 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
4993 Quals: PrivateType.getQualifiers(),
4994 /*IsInitializer=*/true);
4995 }
4996 } else {
4997 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
4998 Quals: PrivateType.getQualifiers(),
4999 /*IsInitializer=*/true);
5000 }
5001 } else {
5002 CGF.EmitAnyExprToMem(E: UDRInitExpr, Location: SharedResult,
5003 Quals: PrivateType.getQualifiers(),
5004 /*IsInitializer=*/true);
5005 }
5006 } else {
5007 // EmitNullInitialization handles default construction for C++ classes
5008 // and zeroing for scalars, which is a reasonable default.
5009 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5010 }
5011 return; // UDR initialization handled
5012 }
5013 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: Privates)) {
5014 if (const auto *VD = dyn_cast<VarDecl>(Val: DRE->getDecl())) {
5015 if (const Expr *InitExpr = VD->getInit()) {
5016 CGF.EmitAnyExprToMem(E: InitExpr, Location: SharedResult,
5017 Quals: PrivateType.getQualifiers(), IsInitializer: true);
5018 return;
5019 }
5020 }
5021 }
5022 CGF.EmitNullInitialization(DestPtr: SharedResult, Ty: PrivateType);
5023 };
5024 EmitSharedInit();
5025 CGF.Builder.CreateBr(Dest: InitEndBB);
5026 CGF.EmitBlock(BB: InitEndBB);
5027
5028 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5029 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5030 args: BarrierArgs);
5031
5032 const Expr *ReductionOp = ReductionOps;
5033 const OMPDeclareReductionDecl *CurrentUDR = getReductionInit(ReductionOp);
5034 LValue SharedLV = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5035 LValue LHSLV = CGF.EmitLValue(E: Privates);
5036
5037 auto EmitCriticalReduction = [&](auto ReductionGen) {
5038 std::string CriticalName = getName(Parts: {"reduction_critical"});
5039 emitCriticalRegion(CGF, CriticalName, CriticalOpGen: ReductionGen, Loc);
5040 };
5041
5042 if (CurrentUDR) {
5043 // Handle user-defined reduction.
5044 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5045 Action.Enter(CGF);
5046 std::pair<llvm::Function *, llvm::Function *> FnPair =
5047 getUserDefinedReduction(D: CurrentUDR);
5048 if (FnPair.first) {
5049 if (const auto *CE = dyn_cast<CallExpr>(Val: ReductionOp)) {
5050 const auto *OutDRE = cast<DeclRefExpr>(
5051 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 0)->IgnoreParenImpCasts())
5052 ->getSubExpr());
5053 const auto *InDRE = cast<DeclRefExpr>(
5054 Val: cast<UnaryOperator>(Val: CE->getArg(Arg: 1)->IgnoreParenImpCasts())
5055 ->getSubExpr());
5056 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5057 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: OutDRE->getDecl()),
5058 Addr: SharedLV.getAddress());
5059 LocalScope.addPrivate(LocalVD: cast<VarDecl>(Val: InDRE->getDecl()),
5060 Addr: LHSLV.getAddress());
5061 (void)LocalScope.Privatize();
5062 emitReductionCombiner(CGF, ReductionOp);
5063 }
5064 }
5065 };
5066 EmitCriticalReduction(ReductionGen);
5067 } else {
5068 // Handle built-in reduction operations.
5069#ifndef NDEBUG
5070 const Expr *ReductionClauseExpr = ReductionOp->IgnoreParenCasts();
5071 if (const auto *Cleanup = dyn_cast<ExprWithCleanups>(ReductionClauseExpr))
5072 ReductionClauseExpr = Cleanup->getSubExpr()->IgnoreParenCasts();
5073
5074 const Expr *AssignRHS = nullptr;
5075 if (const auto *BinOp = dyn_cast<BinaryOperator>(ReductionClauseExpr)) {
5076 if (BinOp->getOpcode() == BO_Assign)
5077 AssignRHS = BinOp->getRHS();
5078 } else if (const auto *OpCall =
5079 dyn_cast<CXXOperatorCallExpr>(ReductionClauseExpr)) {
5080 if (OpCall->getOperator() == OO_Equal)
5081 AssignRHS = OpCall->getArg(1);
5082 }
5083
5084 assert(AssignRHS &&
5085 "Private Variable Reduction : Invalid ReductionOp expression");
5086#endif
5087
5088 auto ReductionGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
5089 Action.Enter(CGF);
5090 const auto *OmpOutDRE =
5091 dyn_cast<DeclRefExpr>(Val: LHSExprs->IgnoreParenImpCasts());
5092 const auto *OmpInDRE =
5093 dyn_cast<DeclRefExpr>(Val: RHSExprs->IgnoreParenImpCasts());
5094 assert(
5095 OmpOutDRE && OmpInDRE &&
5096 "Private Variable Reduction : LHSExpr/RHSExpr must be DeclRefExprs");
5097 const VarDecl *OmpOutVD = cast<VarDecl>(Val: OmpOutDRE->getDecl());
5098 const VarDecl *OmpInVD = cast<VarDecl>(Val: OmpInDRE->getDecl());
5099 CodeGenFunction::OMPPrivateScope LocalScope(CGF);
5100 LocalScope.addPrivate(LocalVD: OmpOutVD, Addr: SharedLV.getAddress());
5101 LocalScope.addPrivate(LocalVD: OmpInVD, Addr: LHSLV.getAddress());
5102 (void)LocalScope.Privatize();
5103 // Emit the actual reduction operation
5104 CGF.EmitIgnoredExpr(E: ReductionOp);
5105 };
5106 EmitCriticalReduction(ReductionGen);
5107 }
5108
5109 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5110 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5111 args: BarrierArgs);
5112
5113 // Broadcast final result
5114 bool IsAggregate = PrivateType->isAggregateType();
5115 LValue SharedLV1 = CGF.MakeAddrLValue(Addr: SharedResult, T: PrivateType);
5116 llvm::Value *FinalResultVal = nullptr;
5117 Address FinalResultAddr = Address::invalid();
5118
5119 if (IsAggregate)
5120 FinalResultAddr = SharedResult;
5121 else
5122 FinalResultVal = CGF.EmitLoadOfScalar(lvalue: SharedLV1, Loc);
5123
5124 LValue TargetLHSLV = CGF.EmitLValue(E: RHSExprs);
5125 if (IsAggregate) {
5126 CGF.EmitAggregateCopy(Dest: TargetLHSLV,
5127 Src: CGF.MakeAddrLValue(Addr: FinalResultAddr, T: PrivateType),
5128 EltTy: PrivateType, MayOverlap: AggValueSlot::DoesNotOverlap, isVolatile: false);
5129 } else {
5130 CGF.EmitStoreOfScalar(value: FinalResultVal, lvalue: TargetLHSLV);
5131 }
5132 // Final synchronization barrier
5133 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5134 M&: CGM.getModule(), FnID: OMPRTL___kmpc_barrier),
5135 args: BarrierArgs);
5136
5137 // Combiner with original list item
5138 auto OriginalListCombiner = [&](CodeGenFunction &CGF,
5139 PrePostActionTy &Action) {
5140 Action.Enter(CGF);
5141 emitSingleReductionCombiner(CGF, ReductionOp: ReductionOps, PrivateRef: Privates,
5142 LHS: cast<DeclRefExpr>(Val: LHSExprs),
5143 RHS: cast<DeclRefExpr>(Val: RHSExprs));
5144 };
5145 EmitCriticalReduction(OriginalListCombiner);
5146}
5147
5148void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
5149 ArrayRef<const Expr *> OrgPrivates,
5150 ArrayRef<const Expr *> OrgLHSExprs,
5151 ArrayRef<const Expr *> OrgRHSExprs,
5152 ArrayRef<const Expr *> OrgReductionOps,
5153 ReductionOptionsTy Options) {
5154 if (!CGF.HaveInsertPoint())
5155 return;
5156
5157 bool WithNowait = Options.WithNowait;
5158 bool SimpleReduction = Options.SimpleReduction;
5159
5160 // Next code should be emitted for reduction:
5161 //
5162 // static kmp_critical_name lock = { 0 };
5163 //
5164 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5165 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5166 // ...
5167 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5168 // *(Type<n>-1*)rhs[<n>-1]);
5169 // }
5170 //
5171 // ...
5172 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5173 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5174 // RedList, reduce_func, &<lock>)) {
5175 // case 1:
5176 // ...
5177 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5178 // ...
5179 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5180 // break;
5181 // case 2:
5182 // ...
5183 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5184 // ...
5185 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5186 // break;
5187 // default:;
5188 // }
5189 //
5190 // if SimpleReduction is true, only the next code is generated:
5191 // ...
5192 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5193 // ...
5194
5195 ASTContext &C = CGM.getContext();
5196
5197 if (SimpleReduction) {
5198 CodeGenFunction::RunCleanupsScope Scope(CGF);
5199 const auto *IPriv = OrgPrivates.begin();
5200 const auto *ILHS = OrgLHSExprs.begin();
5201 const auto *IRHS = OrgRHSExprs.begin();
5202 for (const Expr *E : OrgReductionOps) {
5203 emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5204 RHS: cast<DeclRefExpr>(Val: *IRHS));
5205 ++IPriv;
5206 ++ILHS;
5207 ++IRHS;
5208 }
5209 return;
5210 }
5211
5212 // Filter out shared reduction variables based on IsPrivateVarReduction flag.
5213 // Only keep entries where the corresponding variable is not private.
5214 SmallVector<const Expr *> FilteredPrivates, FilteredLHSExprs,
5215 FilteredRHSExprs, FilteredReductionOps;
5216 for (unsigned I : llvm::seq<unsigned>(
5217 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5218 if (!Options.IsPrivateVarReduction[I]) {
5219 FilteredPrivates.emplace_back(Args: OrgPrivates[I]);
5220 FilteredLHSExprs.emplace_back(Args: OrgLHSExprs[I]);
5221 FilteredRHSExprs.emplace_back(Args: OrgRHSExprs[I]);
5222 FilteredReductionOps.emplace_back(Args: OrgReductionOps[I]);
5223 }
5224 }
5225 // Wrap filtered vectors in ArrayRef for downstream shared reduction
5226 // processing.
5227 ArrayRef<const Expr *> Privates = FilteredPrivates;
5228 ArrayRef<const Expr *> LHSExprs = FilteredLHSExprs;
5229 ArrayRef<const Expr *> RHSExprs = FilteredRHSExprs;
5230 ArrayRef<const Expr *> ReductionOps = FilteredReductionOps;
5231
5232 // 1. Build a list of reduction variables.
5233 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5234 auto Size = RHSExprs.size();
5235 for (const Expr *E : Privates) {
5236 if (E->getType()->isVariablyModifiedType())
5237 // Reserve place for array size.
5238 ++Size;
5239 }
5240 llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
5241 QualType ReductionArrayTy = C.getConstantArrayType(
5242 EltTy: C.VoidPtrTy, ArySize: ArraySize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5243 /*IndexTypeQuals=*/0);
5244 RawAddress ReductionList =
5245 CGF.CreateMemTemp(T: ReductionArrayTy, Name: ".omp.reduction.red_list");
5246 const auto *IPriv = Privates.begin();
5247 unsigned Idx = 0;
5248 for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
5249 Address Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5250 CGF.Builder.CreateStore(
5251 Val: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5252 V: CGF.EmitLValue(E: RHSExprs[I]).getPointer(CGF), DestTy: CGF.VoidPtrTy),
5253 Addr: Elem);
5254 if ((*IPriv)->getType()->isVariablyModifiedType()) {
5255 // Store array size.
5256 ++Idx;
5257 Elem = CGF.Builder.CreateConstArrayGEP(Addr: ReductionList, Index: Idx);
5258 llvm::Value *Size = CGF.Builder.CreateIntCast(
5259 V: CGF.getVLASize(
5260 vla: CGF.getContext().getAsVariableArrayType(T: (*IPriv)->getType()))
5261 .NumElts,
5262 DestTy: CGF.SizeTy, /*isSigned=*/false);
5263 CGF.Builder.CreateStore(Val: CGF.Builder.CreateIntToPtr(V: Size, DestTy: CGF.VoidPtrTy),
5264 Addr: Elem);
5265 }
5266 }
5267
5268 // 2. Emit reduce_func().
5269 llvm::Function *ReductionFn = emitReductionFunction(
5270 ReducerName: CGF.CurFn->getName(), Loc, ArgsElemType: CGF.ConvertTypeForMem(T: ReductionArrayTy),
5271 Privates, LHSExprs, RHSExprs, ReductionOps);
5272
5273 // 3. Create static kmp_critical_name lock = { 0 };
5274 std::string Name = getName(Parts: {"reduction"});
5275 llvm::Value *Lock = getCriticalRegionLock(CriticalName: Name);
5276
5277 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5278 // RedList, reduce_func, &<lock>);
5279 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc, Flags: OMP_ATOMIC_REDUCE);
5280 llvm::Value *ThreadId = getThreadID(CGF, Loc);
5281 llvm::Value *ReductionArrayTySize = CGF.getTypeSize(Ty: ReductionArrayTy);
5282 llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5283 V: ReductionList.getPointer(), DestTy: CGF.VoidPtrTy);
5284 llvm::Value *Args[] = {
5285 IdentTLoc, // ident_t *<loc>
5286 ThreadId, // i32 <gtid>
5287 CGF.Builder.getInt32(C: RHSExprs.size()), // i32 <n>
5288 ReductionArrayTySize, // size_type sizeof(RedList)
5289 RL, // void *RedList
5290 ReductionFn, // void (*) (void *, void *) <reduce_func>
5291 Lock // kmp_critical_name *&<lock>
5292 };
5293 llvm::Value *Res = CGF.EmitRuntimeCall(
5294 callee: OMPBuilder.getOrCreateRuntimeFunction(
5295 M&: CGM.getModule(),
5296 FnID: WithNowait ? OMPRTL___kmpc_reduce_nowait : OMPRTL___kmpc_reduce),
5297 args: Args);
5298
5299 // 5. Build switch(res)
5300 llvm::BasicBlock *DefaultBB = CGF.createBasicBlock(name: ".omp.reduction.default");
5301 llvm::SwitchInst *SwInst =
5302 CGF.Builder.CreateSwitch(V: Res, Dest: DefaultBB, /*NumCases=*/2);
5303
5304 // 6. Build case 1:
5305 // ...
5306 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5307 // ...
5308 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5309 // break;
5310 llvm::BasicBlock *Case1BB = CGF.createBasicBlock(name: ".omp.reduction.case1");
5311 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 1), Dest: Case1BB);
5312 CGF.EmitBlock(BB: Case1BB);
5313
5314 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5315 llvm::Value *EndArgs[] = {
5316 IdentTLoc, // ident_t *<loc>
5317 ThreadId, // i32 <gtid>
5318 Lock // kmp_critical_name *&<lock>
5319 };
5320 auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps](
5321 CodeGenFunction &CGF, PrePostActionTy &Action) {
5322 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5323 const auto *IPriv = Privates.begin();
5324 const auto *ILHS = LHSExprs.begin();
5325 const auto *IRHS = RHSExprs.begin();
5326 for (const Expr *E : ReductionOps) {
5327 RT.emitSingleReductionCombiner(CGF, ReductionOp: E, PrivateRef: *IPriv, LHS: cast<DeclRefExpr>(Val: *ILHS),
5328 RHS: cast<DeclRefExpr>(Val: *IRHS));
5329 ++IPriv;
5330 ++ILHS;
5331 ++IRHS;
5332 }
5333 };
5334 RegionCodeGenTy RCG(CodeGen);
5335 CommonActionTy Action(
5336 nullptr, {},
5337 OMPBuilder.getOrCreateRuntimeFunction(
5338 M&: CGM.getModule(), FnID: WithNowait ? OMPRTL___kmpc_end_reduce_nowait
5339 : OMPRTL___kmpc_end_reduce),
5340 EndArgs);
5341 RCG.setAction(Action);
5342 RCG(CGF);
5343
5344 CGF.EmitBranch(Block: DefaultBB);
5345
5346 // 7. Build case 2:
5347 // ...
5348 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5349 // ...
5350 // break;
5351 llvm::BasicBlock *Case2BB = CGF.createBasicBlock(name: ".omp.reduction.case2");
5352 SwInst->addCase(OnVal: CGF.Builder.getInt32(C: 2), Dest: Case2BB);
5353 CGF.EmitBlock(BB: Case2BB);
5354
5355 auto &&AtomicCodeGen = [Loc, Privates, LHSExprs, RHSExprs, ReductionOps](
5356 CodeGenFunction &CGF, PrePostActionTy &Action) {
5357 const auto *ILHS = LHSExprs.begin();
5358 const auto *IRHS = RHSExprs.begin();
5359 const auto *IPriv = Privates.begin();
5360 for (const Expr *E : ReductionOps) {
5361 const Expr *XExpr = nullptr;
5362 const Expr *EExpr = nullptr;
5363 const Expr *UpExpr = nullptr;
5364 BinaryOperatorKind BO = BO_Comma;
5365 if (const auto *BO = dyn_cast<BinaryOperator>(Val: E)) {
5366 if (BO->getOpcode() == BO_Assign) {
5367 XExpr = BO->getLHS();
5368 UpExpr = BO->getRHS();
5369 }
5370 }
5371 // Try to emit update expression as a simple atomic.
5372 const Expr *RHSExpr = UpExpr;
5373 if (RHSExpr) {
5374 // Analyze RHS part of the whole expression.
5375 if (const auto *ACO = dyn_cast<AbstractConditionalOperator>(
5376 Val: RHSExpr->IgnoreParenImpCasts())) {
5377 // If this is a conditional operator, analyze its condition for
5378 // min/max reduction operator.
5379 RHSExpr = ACO->getCond();
5380 }
5381 if (const auto *BORHS =
5382 dyn_cast<BinaryOperator>(Val: RHSExpr->IgnoreParenImpCasts())) {
5383 EExpr = BORHS->getRHS();
5384 BO = BORHS->getOpcode();
5385 }
5386 }
5387 if (XExpr) {
5388 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5389 auto &&AtomicRedGen = [BO, VD,
5390 Loc](CodeGenFunction &CGF, const Expr *XExpr,
5391 const Expr *EExpr, const Expr *UpExpr) {
5392 LValue X = CGF.EmitLValue(E: XExpr);
5393 RValue E;
5394 if (EExpr)
5395 E = CGF.EmitAnyExpr(E: EExpr);
5396 CGF.EmitOMPAtomicSimpleUpdateExpr(
5397 X, E, BO, /*IsXLHSInRHSPart=*/true,
5398 AO: llvm::AtomicOrdering::Monotonic, Loc,
5399 CommonGen: [&CGF, UpExpr, VD, Loc](RValue XRValue) {
5400 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5401 Address LHSTemp = CGF.CreateMemTemp(T: VD->getType());
5402 CGF.emitOMPSimpleStore(
5403 LVal: CGF.MakeAddrLValue(Addr: LHSTemp, T: VD->getType()), RVal: XRValue,
5404 RValTy: VD->getType().getNonReferenceType(), Loc);
5405 PrivateScope.addPrivate(LocalVD: VD, Addr: LHSTemp);
5406 (void)PrivateScope.Privatize();
5407 return CGF.EmitAnyExpr(E: UpExpr);
5408 });
5409 };
5410 if ((*IPriv)->getType()->isArrayType()) {
5411 // Emit atomic reduction for array section.
5412 const auto *RHSVar =
5413 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5414 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar: VD, RHSVar,
5415 RedOpGen: AtomicRedGen, XExpr, EExpr, UpExpr);
5416 } else {
5417 // Emit atomic reduction for array subscript or single variable.
5418 AtomicRedGen(CGF, XExpr, EExpr, UpExpr);
5419 }
5420 } else {
5421 // Emit as a critical region.
5422 auto &&CritRedGen = [E, Loc](CodeGenFunction &CGF, const Expr *,
5423 const Expr *, const Expr *) {
5424 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
5425 std::string Name = RT.getName(Parts: {"atomic_reduction"});
5426 RT.emitCriticalRegion(
5427 CGF, CriticalName: Name,
5428 CriticalOpGen: [=](CodeGenFunction &CGF, PrePostActionTy &Action) {
5429 Action.Enter(CGF);
5430 emitReductionCombiner(CGF, ReductionOp: E);
5431 },
5432 Loc);
5433 };
5434 if ((*IPriv)->getType()->isArrayType()) {
5435 const auto *LHSVar =
5436 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
5437 const auto *RHSVar =
5438 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
5439 EmitOMPAggregateReduction(CGF, Type: (*IPriv)->getType(), LHSVar, RHSVar,
5440 RedOpGen: CritRedGen);
5441 } else {
5442 CritRedGen(CGF, nullptr, nullptr, nullptr);
5443 }
5444 }
5445 ++ILHS;
5446 ++IRHS;
5447 ++IPriv;
5448 }
5449 };
5450 RegionCodeGenTy AtomicRCG(AtomicCodeGen);
5451 if (!WithNowait) {
5452 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5453 llvm::Value *EndArgs[] = {
5454 IdentTLoc, // ident_t *<loc>
5455 ThreadId, // i32 <gtid>
5456 Lock // kmp_critical_name *&<lock>
5457 };
5458 CommonActionTy Action(nullptr, {},
5459 OMPBuilder.getOrCreateRuntimeFunction(
5460 M&: CGM.getModule(), FnID: OMPRTL___kmpc_end_reduce),
5461 EndArgs);
5462 AtomicRCG.setAction(Action);
5463 AtomicRCG(CGF);
5464 } else {
5465 AtomicRCG(CGF);
5466 }
5467
5468 CGF.EmitBranch(Block: DefaultBB);
5469 CGF.EmitBlock(BB: DefaultBB, /*IsFinished=*/true);
5470 assert(OrgLHSExprs.size() == OrgPrivates.size() &&
5471 "PrivateVarReduction: Privates size mismatch");
5472 assert(OrgLHSExprs.size() == OrgReductionOps.size() &&
5473 "PrivateVarReduction: ReductionOps size mismatch");
5474 for (unsigned I : llvm::seq<unsigned>(
5475 Size: std::min(a: OrgReductionOps.size(), b: OrgLHSExprs.size()))) {
5476 if (Options.IsPrivateVarReduction[I])
5477 emitPrivateReduction(CGF, Loc, Privates: OrgPrivates[I], LHSExprs: OrgLHSExprs[I],
5478 RHSExprs: OrgRHSExprs[I], ReductionOps: OrgReductionOps[I]);
5479 }
5480}
5481
5482/// Generates unique name for artificial threadprivate variables.
5483/// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5484static std::string generateUniqueName(CodeGenModule &CGM, StringRef Prefix,
5485 const Expr *Ref) {
5486 SmallString<256> Buffer;
5487 llvm::raw_svector_ostream Out(Buffer);
5488 const clang::DeclRefExpr *DE;
5489 const VarDecl *D = ::getBaseDecl(Ref, DE);
5490 if (!D)
5491 D = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Ref)->getDecl());
5492 D = D->getCanonicalDecl();
5493 std::string Name = CGM.getOpenMPRuntime().getName(
5494 Parts: {D->isLocalVarDeclOrParm() ? D->getName() : CGM.getMangledName(GD: D)});
5495 Out << Prefix << Name << "_"
5496 << D->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5497 return std::string(Out.str());
5498}
5499
5500/// Emits reduction initializer function:
5501/// \code
5502/// void @.red_init(void* %arg, void* %orig) {
5503/// %0 = bitcast void* %arg to <type>*
5504/// store <type> <init>, <type>* %0
5505/// ret void
5506/// }
5507/// \endcode
5508static llvm::Value *emitReduceInitFunction(CodeGenModule &CGM,
5509 SourceLocation Loc,
5510 ReductionCodeGen &RCG, unsigned N) {
5511 ASTContext &C = CGM.getContext();
5512 QualType VoidPtrTy = C.VoidPtrTy;
5513 VoidPtrTy.addRestrict();
5514 FunctionArgList Args;
5515 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5516 ImplicitParamKind::Other);
5517 ImplicitParamDecl ParamOrig(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, VoidPtrTy,
5518 ImplicitParamKind::Other);
5519 Args.emplace_back(Args: &Param);
5520 Args.emplace_back(Args: &ParamOrig);
5521 const auto &FnInfo =
5522 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5523 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5524 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_init", ""});
5525 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5526 N: Name, M: &CGM.getModule());
5527 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5528 Fn->setDoesNotRecurse();
5529 CodeGenFunction CGF(CGM);
5530 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5531 QualType PrivateType = RCG.getPrivateType(N);
5532 Address PrivateAddr = CGF.EmitLoadOfPointer(
5533 Ptr: CGF.GetAddrOfLocalVar(VD: &Param).withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5534 PtrTy: C.getPointerType(T: PrivateType)->castAs<PointerType>());
5535 llvm::Value *Size = nullptr;
5536 // If the size of the reduction item is non-constant, load it from global
5537 // threadprivate variable.
5538 if (RCG.getSizes(N).second) {
5539 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5540 CGF, VarType: CGM.getContext().getSizeType(),
5541 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5542 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5543 Ty: CGM.getContext().getSizeType(), Loc);
5544 }
5545 RCG.emitAggregateType(CGF, N, Size);
5546 Address OrigAddr = Address::invalid();
5547 // If initializer uses initializer from declare reduction construct, emit a
5548 // pointer to the address of the original reduction item (reuired by reduction
5549 // initializer)
5550 if (RCG.usesReductionInitializer(N)) {
5551 Address SharedAddr = CGF.GetAddrOfLocalVar(VD: &ParamOrig);
5552 OrigAddr = CGF.EmitLoadOfPointer(
5553 Ptr: SharedAddr,
5554 PtrTy: CGM.getContext().VoidPtrTy.castAs<PointerType>()->getTypePtr());
5555 }
5556 // Emit the initializer:
5557 // %0 = bitcast void* %arg to <type>*
5558 // store <type> <init>, <type>* %0
5559 RCG.emitInitialization(CGF, N, PrivateAddr, SharedAddr: OrigAddr,
5560 DefaultInit: [](CodeGenFunction &) { return false; });
5561 CGF.FinishFunction();
5562 return Fn;
5563}
5564
5565/// Emits reduction combiner function:
5566/// \code
5567/// void @.red_comb(void* %arg0, void* %arg1) {
5568/// %lhs = bitcast void* %arg0 to <type>*
5569/// %rhs = bitcast void* %arg1 to <type>*
5570/// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5571/// store <type> %2, <type>* %lhs
5572/// ret void
5573/// }
5574/// \endcode
5575static llvm::Value *emitReduceCombFunction(CodeGenModule &CGM,
5576 SourceLocation Loc,
5577 ReductionCodeGen &RCG, unsigned N,
5578 const Expr *ReductionOp,
5579 const Expr *LHS, const Expr *RHS,
5580 const Expr *PrivateRef) {
5581 ASTContext &C = CGM.getContext();
5582 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHS)->getDecl());
5583 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHS)->getDecl());
5584 FunctionArgList Args;
5585 ImplicitParamDecl ParamInOut(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
5586 C.VoidPtrTy, ImplicitParamKind::Other);
5587 ImplicitParamDecl ParamIn(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5588 ImplicitParamKind::Other);
5589 Args.emplace_back(Args: &ParamInOut);
5590 Args.emplace_back(Args: &ParamIn);
5591 const auto &FnInfo =
5592 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5593 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5594 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_comb", ""});
5595 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5596 N: Name, M: &CGM.getModule());
5597 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5598 Fn->setDoesNotRecurse();
5599 CodeGenFunction CGF(CGM);
5600 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5601 llvm::Value *Size = nullptr;
5602 // If the size of the reduction item is non-constant, load it from global
5603 // threadprivate variable.
5604 if (RCG.getSizes(N).second) {
5605 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5606 CGF, VarType: CGM.getContext().getSizeType(),
5607 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5608 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5609 Ty: CGM.getContext().getSizeType(), Loc);
5610 }
5611 RCG.emitAggregateType(CGF, N, Size);
5612 // Remap lhs and rhs variables to the addresses of the function arguments.
5613 // %lhs = bitcast void* %arg0 to <type>*
5614 // %rhs = bitcast void* %arg1 to <type>*
5615 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
5616 PrivateScope.addPrivate(
5617 LocalVD: LHSVD,
5618 // Pull out the pointer to the variable.
5619 Addr: CGF.EmitLoadOfPointer(
5620 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamInOut)
5621 .withElementType(ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5622 PtrTy: C.getPointerType(T: LHSVD->getType())->castAs<PointerType>()));
5623 PrivateScope.addPrivate(
5624 LocalVD: RHSVD,
5625 // Pull out the pointer to the variable.
5626 Addr: CGF.EmitLoadOfPointer(
5627 Ptr: CGF.GetAddrOfLocalVar(VD: &ParamIn).withElementType(
5628 ElemTy: CGF.Builder.getPtrTy(AddrSpace: 0)),
5629 PtrTy: C.getPointerType(T: RHSVD->getType())->castAs<PointerType>()));
5630 PrivateScope.Privatize();
5631 // Emit the combiner body:
5632 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5633 // store <type> %2, <type>* %lhs
5634 CGM.getOpenMPRuntime().emitSingleReductionCombiner(
5635 CGF, ReductionOp, PrivateRef, LHS: cast<DeclRefExpr>(Val: LHS),
5636 RHS: cast<DeclRefExpr>(Val: RHS));
5637 CGF.FinishFunction();
5638 return Fn;
5639}
5640
5641/// Emits reduction finalizer function:
5642/// \code
5643/// void @.red_fini(void* %arg) {
5644/// %0 = bitcast void* %arg to <type>*
5645/// <destroy>(<type>* %0)
5646/// ret void
5647/// }
5648/// \endcode
5649static llvm::Value *emitReduceFiniFunction(CodeGenModule &CGM,
5650 SourceLocation Loc,
5651 ReductionCodeGen &RCG, unsigned N) {
5652 if (!RCG.needCleanups(N))
5653 return nullptr;
5654 ASTContext &C = CGM.getContext();
5655 FunctionArgList Args;
5656 ImplicitParamDecl Param(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.VoidPtrTy,
5657 ImplicitParamKind::Other);
5658 Args.emplace_back(Args: &Param);
5659 const auto &FnInfo =
5660 CGM.getTypes().arrangeBuiltinFunctionDeclaration(resultType: C.VoidTy, args: Args);
5661 llvm::FunctionType *FnTy = CGM.getTypes().GetFunctionType(Info: FnInfo);
5662 std::string Name = CGM.getOpenMPRuntime().getName(Parts: {"red_fini", ""});
5663 auto *Fn = llvm::Function::Create(Ty: FnTy, Linkage: llvm::GlobalValue::InternalLinkage,
5664 N: Name, M: &CGM.getModule());
5665 CGM.SetInternalFunctionAttributes(GD: GlobalDecl(), F: Fn, FI: FnInfo);
5666 Fn->setDoesNotRecurse();
5667 CodeGenFunction CGF(CGM);
5668 CGF.StartFunction(GD: GlobalDecl(), RetTy: C.VoidTy, Fn, FnInfo, Args, Loc, StartLoc: Loc);
5669 Address PrivateAddr = CGF.EmitLoadOfPointer(
5670 Ptr: CGF.GetAddrOfLocalVar(VD: &Param), PtrTy: C.VoidPtrTy.castAs<PointerType>());
5671 llvm::Value *Size = nullptr;
5672 // If the size of the reduction item is non-constant, load it from global
5673 // threadprivate variable.
5674 if (RCG.getSizes(N).second) {
5675 Address SizeAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5676 CGF, VarType: CGM.getContext().getSizeType(),
5677 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5678 Size = CGF.EmitLoadOfScalar(Addr: SizeAddr, /*Volatile=*/false,
5679 Ty: CGM.getContext().getSizeType(), Loc);
5680 }
5681 RCG.emitAggregateType(CGF, N, Size);
5682 // Emit the finalizer body:
5683 // <destroy>(<type>* %0)
5684 RCG.emitCleanups(CGF, N, PrivateAddr);
5685 CGF.FinishFunction(EndLoc: Loc);
5686 return Fn;
5687}
5688
5689llvm::Value *CGOpenMPRuntime::emitTaskReductionInit(
5690 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
5691 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
5692 if (!CGF.HaveInsertPoint() || Data.ReductionVars.empty())
5693 return nullptr;
5694
5695 // Build typedef struct:
5696 // kmp_taskred_input {
5697 // void *reduce_shar; // shared reduction item
5698 // void *reduce_orig; // original reduction item used for initialization
5699 // size_t reduce_size; // size of data item
5700 // void *reduce_init; // data initialization routine
5701 // void *reduce_fini; // data finalization routine
5702 // void *reduce_comb; // data combiner routine
5703 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5704 // } kmp_taskred_input_t;
5705 ASTContext &C = CGM.getContext();
5706 RecordDecl *RD = C.buildImplicitRecord(Name: "kmp_taskred_input_t");
5707 RD->startDefinition();
5708 const FieldDecl *SharedFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5709 const FieldDecl *OrigFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5710 const FieldDecl *SizeFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.getSizeType());
5711 const FieldDecl *InitFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5712 const FieldDecl *FiniFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5713 const FieldDecl *CombFD = addFieldToRecordDecl(C, DC: RD, FieldTy: C.VoidPtrTy);
5714 const FieldDecl *FlagsFD = addFieldToRecordDecl(
5715 C, DC: RD, FieldTy: C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5716 RD->completeDefinition();
5717 QualType RDType = C.getRecordType(Decl: RD);
5718 unsigned Size = Data.ReductionVars.size();
5719 llvm::APInt ArraySize(/*numBits=*/64, Size);
5720 QualType ArrayRDType =
5721 C.getConstantArrayType(EltTy: RDType, ArySize: ArraySize, SizeExpr: nullptr,
5722 ASM: ArraySizeModifier::Normal, /*IndexTypeQuals=*/0);
5723 // kmp_task_red_input_t .rd_input.[Size];
5724 RawAddress TaskRedInput = CGF.CreateMemTemp(T: ArrayRDType, Name: ".rd_input.");
5725 ReductionCodeGen RCG(Data.ReductionVars, Data.ReductionOrigs,
5726 Data.ReductionCopies, Data.ReductionOps);
5727 for (unsigned Cnt = 0; Cnt < Size; ++Cnt) {
5728 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5729 llvm::Value *Idxs[] = {llvm::ConstantInt::get(Ty: CGM.SizeTy, /*V=*/0),
5730 llvm::ConstantInt::get(Ty: CGM.SizeTy, V: Cnt)};
5731 llvm::Value *GEP = CGF.EmitCheckedInBoundsGEP(
5732 ElemTy: TaskRedInput.getElementType(), Ptr: TaskRedInput.getPointer(), IdxList: Idxs,
5733 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc,
5734 Name: ".rd_input.gep.");
5735 LValue ElemLVal = CGF.MakeNaturalAlignRawAddrLValue(V: GEP, T: RDType);
5736 // ElemLVal.reduce_shar = &Shareds[Cnt];
5737 LValue SharedLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SharedFD);
5738 RCG.emitSharedOrigLValue(CGF, N: Cnt);
5739 llvm::Value *Shared = RCG.getSharedLValue(N: Cnt).getPointer(CGF);
5740 CGF.EmitStoreOfScalar(value: Shared, lvalue: SharedLVal);
5741 // ElemLVal.reduce_orig = &Origs[Cnt];
5742 LValue OrigLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: OrigFD);
5743 llvm::Value *Orig = RCG.getOrigLValue(N: Cnt).getPointer(CGF);
5744 CGF.EmitStoreOfScalar(value: Orig, lvalue: OrigLVal);
5745 RCG.emitAggregateType(CGF, N: Cnt);
5746 llvm::Value *SizeValInChars;
5747 llvm::Value *SizeVal;
5748 std::tie(args&: SizeValInChars, args&: SizeVal) = RCG.getSizes(N: Cnt);
5749 // We use delayed creation/initialization for VLAs and array sections. It is
5750 // required because runtime does not provide the way to pass the sizes of
5751 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5752 // threadprivate global variables are used to store these values and use
5753 // them in the functions.
5754 bool DelayedCreation = !!SizeVal;
5755 SizeValInChars = CGF.Builder.CreateIntCast(V: SizeValInChars, DestTy: CGM.SizeTy,
5756 /*isSigned=*/false);
5757 LValue SizeLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: SizeFD);
5758 CGF.EmitStoreOfScalar(value: SizeValInChars, lvalue: SizeLVal);
5759 // ElemLVal.reduce_init = init;
5760 LValue InitLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: InitFD);
5761 llvm::Value *InitAddr = emitReduceInitFunction(CGM, Loc, RCG, N: Cnt);
5762 CGF.EmitStoreOfScalar(value: InitAddr, lvalue: InitLVal);
5763 // ElemLVal.reduce_fini = fini;
5764 LValue FiniLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FiniFD);
5765 llvm::Value *Fini = emitReduceFiniFunction(CGM, Loc, RCG, N: Cnt);
5766 llvm::Value *FiniAddr =
5767 Fini ? Fini : llvm::ConstantPointerNull::get(T: CGM.VoidPtrTy);
5768 CGF.EmitStoreOfScalar(value: FiniAddr, lvalue: FiniLVal);
5769 // ElemLVal.reduce_comb = comb;
5770 LValue CombLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: CombFD);
5771 llvm::Value *CombAddr = emitReduceCombFunction(
5772 CGM, Loc, RCG, N: Cnt, ReductionOp: Data.ReductionOps[Cnt], LHS: LHSExprs[Cnt],
5773 RHS: RHSExprs[Cnt], PrivateRef: Data.ReductionCopies[Cnt]);
5774 CGF.EmitStoreOfScalar(value: CombAddr, lvalue: CombLVal);
5775 // ElemLVal.flags = 0;
5776 LValue FlagsLVal = CGF.EmitLValueForField(Base: ElemLVal, Field: FlagsFD);
5777 if (DelayedCreation) {
5778 CGF.EmitStoreOfScalar(
5779 value: llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/1, /*isSigned=*/IsSigned: true),
5780 lvalue: FlagsLVal);
5781 } else
5782 CGF.EmitNullInitialization(DestPtr: FlagsLVal.getAddress(), Ty: FlagsLVal.getType());
5783 }
5784 if (Data.IsReductionWithTaskMod) {
5785 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5786 // is_ws, int num, void *data);
5787 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5788 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5789 DestTy: CGM.IntTy, /*isSigned=*/true);
5790 llvm::Value *Args[] = {
5791 IdentTLoc, GTid,
5792 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Data.IsWorksharingReduction ? 1 : 0,
5793 /*isSigned=*/IsSigned: true),
5794 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5795 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5796 V: TaskRedInput.getPointer(), DestTy: CGM.VoidPtrTy)};
5797 return CGF.EmitRuntimeCall(
5798 callee: OMPBuilder.getOrCreateRuntimeFunction(
5799 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_modifier_init),
5800 args: Args);
5801 }
5802 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5803 llvm::Value *Args[] = {
5804 CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc), DestTy: CGM.IntTy,
5805 /*isSigned=*/true),
5806 llvm::ConstantInt::get(Ty: CGM.IntTy, V: Size, /*isSigned=*/IsSigned: true),
5807 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: TaskRedInput.getPointer(),
5808 DestTy: CGM.VoidPtrTy)};
5809 return CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5810 M&: CGM.getModule(), FnID: OMPRTL___kmpc_taskred_init),
5811 args: Args);
5812}
5813
5814void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
5815 SourceLocation Loc,
5816 bool IsWorksharingReduction) {
5817 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5818 // is_ws, int num, void *data);
5819 llvm::Value *IdentTLoc = emitUpdateLocation(CGF, Loc);
5820 llvm::Value *GTid = CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5821 DestTy: CGM.IntTy, /*isSigned=*/true);
5822 llvm::Value *Args[] = {IdentTLoc, GTid,
5823 llvm::ConstantInt::get(Ty: CGM.IntTy,
5824 V: IsWorksharingReduction ? 1 : 0,
5825 /*isSigned=*/IsSigned: true)};
5826 (void)CGF.EmitRuntimeCall(
5827 callee: OMPBuilder.getOrCreateRuntimeFunction(
5828 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_modifier_fini),
5829 args: Args);
5830}
5831
5832void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
5833 SourceLocation Loc,
5834 ReductionCodeGen &RCG,
5835 unsigned N) {
5836 auto Sizes = RCG.getSizes(N);
5837 // Emit threadprivate global variable if the type is non-constant
5838 // (Sizes.second = nullptr).
5839 if (Sizes.second) {
5840 llvm::Value *SizeVal = CGF.Builder.CreateIntCast(V: Sizes.second, DestTy: CGM.SizeTy,
5841 /*isSigned=*/false);
5842 Address SizeAddr = getAddrOfArtificialThreadPrivate(
5843 CGF, VarType: CGM.getContext().getSizeType(),
5844 Name: generateUniqueName(CGM, Prefix: "reduction_size", Ref: RCG.getRefExpr(N)));
5845 CGF.Builder.CreateStore(Val: SizeVal, Addr: SizeAddr, /*IsVolatile=*/false);
5846 }
5847}
5848
5849Address CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction &CGF,
5850 SourceLocation Loc,
5851 llvm::Value *ReductionsPtr,
5852 LValue SharedLVal) {
5853 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5854 // *d);
5855 llvm::Value *Args[] = {CGF.Builder.CreateIntCast(V: getThreadID(CGF, Loc),
5856 DestTy: CGM.IntTy,
5857 /*isSigned=*/true),
5858 ReductionsPtr,
5859 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
5860 V: SharedLVal.getPointer(CGF), DestTy: CGM.VoidPtrTy)};
5861 return Address(
5862 CGF.EmitRuntimeCall(
5863 callee: OMPBuilder.getOrCreateRuntimeFunction(
5864 M&: CGM.getModule(), FnID: OMPRTL___kmpc_task_reduction_get_th_data),
5865 args: Args),
5866 CGF.Int8Ty, SharedLVal.getAlignment());
5867}
5868
5869void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction &CGF, SourceLocation Loc,
5870 const OMPTaskDataTy &Data) {
5871 if (!CGF.HaveInsertPoint())
5872 return;
5873
5874 if (CGF.CGM.getLangOpts().OpenMPIRBuilder && Data.Dependences.empty()) {
5875 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5876 OMPBuilder.createTaskwait(Loc: CGF.Builder);
5877 } else {
5878 llvm::Value *ThreadID = getThreadID(CGF, Loc);
5879 llvm::Value *UpLoc = emitUpdateLocation(CGF, Loc);
5880 auto &M = CGM.getModule();
5881 Address DependenciesArray = Address::invalid();
5882 llvm::Value *NumOfElements;
5883 std::tie(args&: NumOfElements, args&: DependenciesArray) =
5884 emitDependClause(CGF, Dependencies: Data.Dependences, Loc);
5885 if (!Data.Dependences.empty()) {
5886 llvm::Value *DepWaitTaskArgs[7];
5887 DepWaitTaskArgs[0] = UpLoc;
5888 DepWaitTaskArgs[1] = ThreadID;
5889 DepWaitTaskArgs[2] = NumOfElements;
5890 DepWaitTaskArgs[3] = DependenciesArray.emitRawPointer(CGF);
5891 DepWaitTaskArgs[4] = CGF.Builder.getInt32(C: 0);
5892 DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5893 DepWaitTaskArgs[6] =
5894 llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: Data.HasNowaitClause);
5895
5896 CodeGenFunction::RunCleanupsScope LocalScope(CGF);
5897
5898 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5899 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5900 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5901 // kmp_int32 has_no_wait); if dependence info is specified.
5902 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
5903 M, FnID: OMPRTL___kmpc_omp_taskwait_deps_51),
5904 args: DepWaitTaskArgs);
5905
5906 } else {
5907
5908 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5909 // global_tid);
5910 llvm::Value *Args[] = {UpLoc, ThreadID};
5911 // Ignore return result until untied tasks are supported.
5912 CGF.EmitRuntimeCall(
5913 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_omp_taskwait),
5914 args: Args);
5915 }
5916 }
5917
5918 if (auto *Region = dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
5919 Region->emitUntiedSwitch(CGF);
5920}
5921
5922void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction &CGF,
5923 OpenMPDirectiveKind InnerKind,
5924 const RegionCodeGenTy &CodeGen,
5925 bool HasCancel) {
5926 if (!CGF.HaveInsertPoint())
5927 return;
5928 InlinedOpenMPRegionRAII Region(CGF, CodeGen, InnerKind, HasCancel,
5929 InnerKind != OMPD_critical &&
5930 InnerKind != OMPD_master &&
5931 InnerKind != OMPD_masked);
5932 CGF.CapturedStmtInfo->EmitBody(CGF, /*S=*/nullptr);
5933}
5934
5935namespace {
5936enum RTCancelKind {
5937 CancelNoreq = 0,
5938 CancelParallel = 1,
5939 CancelLoop = 2,
5940 CancelSections = 3,
5941 CancelTaskgroup = 4
5942};
5943} // anonymous namespace
5944
5945static RTCancelKind getCancellationKind(OpenMPDirectiveKind CancelRegion) {
5946 RTCancelKind CancelKind = CancelNoreq;
5947 if (CancelRegion == OMPD_parallel)
5948 CancelKind = CancelParallel;
5949 else if (CancelRegion == OMPD_for)
5950 CancelKind = CancelLoop;
5951 else if (CancelRegion == OMPD_sections)
5952 CancelKind = CancelSections;
5953 else {
5954 assert(CancelRegion == OMPD_taskgroup);
5955 CancelKind = CancelTaskgroup;
5956 }
5957 return CancelKind;
5958}
5959
5960void CGOpenMPRuntime::emitCancellationPointCall(
5961 CodeGenFunction &CGF, SourceLocation Loc,
5962 OpenMPDirectiveKind CancelRegion) {
5963 if (!CGF.HaveInsertPoint())
5964 return;
5965 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5966 // global_tid, kmp_int32 cncl_kind);
5967 if (auto *OMPRegionInfo =
5968 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
5969 // For 'cancellation point taskgroup', the task region info may not have a
5970 // cancel. This may instead happen in another adjacent task.
5971 if (CancelRegion == OMPD_taskgroup || OMPRegionInfo->hasCancel()) {
5972 llvm::Value *Args[] = {
5973 emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
5974 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
5975 // Ignore return result until untied tasks are supported.
5976 llvm::Value *Result = CGF.EmitRuntimeCall(
5977 callee: OMPBuilder.getOrCreateRuntimeFunction(
5978 M&: CGM.getModule(), FnID: OMPRTL___kmpc_cancellationpoint),
5979 args: Args);
5980 // if (__kmpc_cancellationpoint()) {
5981 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5982 // exit from construct;
5983 // }
5984 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
5985 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
5986 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
5987 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
5988 CGF.EmitBlock(BB: ExitBB);
5989 if (CancelRegion == OMPD_parallel)
5990 emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
5991 // exit from construct;
5992 CodeGenFunction::JumpDest CancelDest =
5993 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
5994 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
5995 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
5996 }
5997 }
5998}
5999
6000void CGOpenMPRuntime::emitCancelCall(CodeGenFunction &CGF, SourceLocation Loc,
6001 const Expr *IfCond,
6002 OpenMPDirectiveKind CancelRegion) {
6003 if (!CGF.HaveInsertPoint())
6004 return;
6005 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
6006 // kmp_int32 cncl_kind);
6007 auto &M = CGM.getModule();
6008 if (auto *OMPRegionInfo =
6009 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo)) {
6010 auto &&ThenGen = [this, &M, Loc, CancelRegion,
6011 OMPRegionInfo](CodeGenFunction &CGF, PrePostActionTy &) {
6012 CGOpenMPRuntime &RT = CGF.CGM.getOpenMPRuntime();
6013 llvm::Value *Args[] = {
6014 RT.emitUpdateLocation(CGF, Loc), RT.getThreadID(CGF, Loc),
6015 CGF.Builder.getInt32(C: getCancellationKind(CancelRegion))};
6016 // Ignore return result until untied tasks are supported.
6017 llvm::Value *Result = CGF.EmitRuntimeCall(
6018 callee: OMPBuilder.getOrCreateRuntimeFunction(M, FnID: OMPRTL___kmpc_cancel), args: Args);
6019 // if (__kmpc_cancel()) {
6020 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
6021 // exit from construct;
6022 // }
6023 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".cancel.exit");
6024 llvm::BasicBlock *ContBB = CGF.createBasicBlock(name: ".cancel.continue");
6025 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Result);
6026 CGF.Builder.CreateCondBr(Cond: Cmp, True: ExitBB, False: ContBB);
6027 CGF.EmitBlock(BB: ExitBB);
6028 if (CancelRegion == OMPD_parallel)
6029 RT.emitBarrierCall(CGF, Loc, Kind: OMPD_unknown, /*EmitChecks=*/false);
6030 // exit from construct;
6031 CodeGenFunction::JumpDest CancelDest =
6032 CGF.getOMPCancelDestination(Kind: OMPRegionInfo->getDirectiveKind());
6033 CGF.EmitBranchThroughCleanup(Dest: CancelDest);
6034 CGF.EmitBlock(BB: ContBB, /*IsFinished=*/true);
6035 };
6036 if (IfCond) {
6037 emitIfClause(CGF, Cond: IfCond, ThenGen,
6038 ElseGen: [](CodeGenFunction &, PrePostActionTy &) {});
6039 } else {
6040 RegionCodeGenTy ThenRCG(ThenGen);
6041 ThenRCG(CGF);
6042 }
6043 }
6044}
6045
6046namespace {
6047/// Cleanup action for uses_allocators support.
6048class OMPUsesAllocatorsActionTy final : public PrePostActionTy {
6049 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators;
6050
6051public:
6052 OMPUsesAllocatorsActionTy(
6053 ArrayRef<std::pair<const Expr *, const Expr *>> Allocators)
6054 : Allocators(Allocators) {}
6055 void Enter(CodeGenFunction &CGF) override {
6056 if (!CGF.HaveInsertPoint())
6057 return;
6058 for (const auto &AllocatorData : Allocators) {
6059 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsInit(
6060 CGF, Allocator: AllocatorData.first, AllocatorTraits: AllocatorData.second);
6061 }
6062 }
6063 void Exit(CodeGenFunction &CGF) override {
6064 if (!CGF.HaveInsertPoint())
6065 return;
6066 for (const auto &AllocatorData : Allocators) {
6067 CGF.CGM.getOpenMPRuntime().emitUsesAllocatorsFini(CGF,
6068 Allocator: AllocatorData.first);
6069 }
6070 }
6071};
6072} // namespace
6073
6074void CGOpenMPRuntime::emitTargetOutlinedFunction(
6075 const OMPExecutableDirective &D, StringRef ParentName,
6076 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6077 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6078 assert(!ParentName.empty() && "Invalid target entry parent name!");
6079 HasEmittedTargetRegion = true;
6080 SmallVector<std::pair<const Expr *, const Expr *>, 4> Allocators;
6081 for (const auto *C : D.getClausesOfKind<OMPUsesAllocatorsClause>()) {
6082 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
6083 const OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
6084 if (!D.AllocatorTraits)
6085 continue;
6086 Allocators.emplace_back(Args: D.Allocator, Args: D.AllocatorTraits);
6087 }
6088 }
6089 OMPUsesAllocatorsActionTy UsesAllocatorAction(Allocators);
6090 CodeGen.setAction(UsesAllocatorAction);
6091 emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
6092 IsOffloadEntry, CodeGen);
6093}
6094
6095void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction &CGF,
6096 const Expr *Allocator,
6097 const Expr *AllocatorTraits) {
6098 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6099 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6100 // Use default memspace handle.
6101 llvm::Value *MemSpaceHandle = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
6102 llvm::Value *NumTraits = llvm::ConstantInt::get(
6103 Ty: CGF.IntTy, V: cast<ConstantArrayType>(
6104 Val: AllocatorTraits->getType()->getAsArrayTypeUnsafe())
6105 ->getSize()
6106 .getLimitedValue());
6107 LValue AllocatorTraitsLVal = CGF.EmitLValue(E: AllocatorTraits);
6108 Address Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
6109 Addr: AllocatorTraitsLVal.getAddress(), Ty: CGF.VoidPtrPtrTy, ElementTy: CGF.VoidPtrTy);
6110 AllocatorTraitsLVal = CGF.MakeAddrLValue(Addr, T: CGF.getContext().VoidPtrTy,
6111 BaseInfo: AllocatorTraitsLVal.getBaseInfo(),
6112 TBAAInfo: AllocatorTraitsLVal.getTBAAInfo());
6113 llvm::Value *Traits = Addr.emitRawPointer(CGF);
6114
6115 llvm::Value *AllocatorVal =
6116 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
6117 M&: CGM.getModule(), FnID: OMPRTL___kmpc_init_allocator),
6118 args: {ThreadId, MemSpaceHandle, NumTraits, Traits});
6119 // Store to allocator.
6120 CGF.EmitAutoVarAlloca(var: *cast<VarDecl>(
6121 Val: cast<DeclRefExpr>(Val: Allocator->IgnoreParenImpCasts())->getDecl()));
6122 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6123 AllocatorVal =
6124 CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: CGF.getContext().VoidPtrTy,
6125 DstTy: Allocator->getType(), Loc: Allocator->getExprLoc());
6126 CGF.EmitStoreOfScalar(value: AllocatorVal, lvalue: AllocatorLVal);
6127}
6128
6129void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction &CGF,
6130 const Expr *Allocator) {
6131 llvm::Value *ThreadId = getThreadID(CGF, Loc: Allocator->getExprLoc());
6132 ThreadId = CGF.Builder.CreateIntCast(V: ThreadId, DestTy: CGF.IntTy, /*isSigned=*/true);
6133 LValue AllocatorLVal = CGF.EmitLValue(E: Allocator->IgnoreParenImpCasts());
6134 llvm::Value *AllocatorVal =
6135 CGF.EmitLoadOfScalar(lvalue: AllocatorLVal, Loc: Allocator->getExprLoc());
6136 AllocatorVal = CGF.EmitScalarConversion(Src: AllocatorVal, SrcTy: Allocator->getType(),
6137 DstTy: CGF.getContext().VoidPtrTy,
6138 Loc: Allocator->getExprLoc());
6139 (void)CGF.EmitRuntimeCall(
6140 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
6141 FnID: OMPRTL___kmpc_destroy_allocator),
6142 args: {ThreadId, AllocatorVal});
6143}
6144
6145void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6146 const OMPExecutableDirective &D, CodeGenFunction &CGF,
6147 llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6148 assert(Attrs.MaxTeams.size() == 1 && Attrs.MaxThreads.size() == 1 &&
6149 "invalid default attrs structure");
6150 int32_t &MaxTeamsVal = Attrs.MaxTeams.front();
6151 int32_t &MaxThreadsVal = Attrs.MaxThreads.front();
6152
6153 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: Attrs.MinTeams, MaxTeamsVal);
6154 getNumThreadsExprForTargetDirective(CGF, D, UpperBound&: MaxThreadsVal,
6155 /*UpperBoundOnly=*/true);
6156
6157 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6158 for (auto *A : C->getAttrs()) {
6159 int32_t AttrMinThreadsVal = 1, AttrMaxThreadsVal = -1;
6160 int32_t AttrMinBlocksVal = 1, AttrMaxBlocksVal = -1;
6161 if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(Val: A))
6162 CGM.handleCUDALaunchBoundsAttr(F: nullptr, A: Attr, MaxThreadsVal: &AttrMaxThreadsVal,
6163 MinBlocksVal: &AttrMinBlocksVal, MaxClusterRankVal: &AttrMaxBlocksVal);
6164 else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(Val: A))
6165 CGM.handleAMDGPUFlatWorkGroupSizeAttr(
6166 F: nullptr, A: Attr, /*ReqdWGS=*/nullptr, MinThreadsVal: &AttrMinThreadsVal,
6167 MaxThreadsVal: &AttrMaxThreadsVal);
6168 else
6169 continue;
6170
6171 Attrs.MinThreads = std::max(a: Attrs.MinThreads, b: AttrMinThreadsVal);
6172 if (AttrMaxThreadsVal > 0)
6173 MaxThreadsVal = MaxThreadsVal > 0
6174 ? std::min(a: MaxThreadsVal, b: AttrMaxThreadsVal)
6175 : AttrMaxThreadsVal;
6176 Attrs.MinTeams = std::max(a: Attrs.MinTeams, b: AttrMinBlocksVal);
6177 if (AttrMaxBlocksVal > 0)
6178 MaxTeamsVal = MaxTeamsVal > 0 ? std::min(a: MaxTeamsVal, b: AttrMaxBlocksVal)
6179 : AttrMaxBlocksVal;
6180 }
6181 }
6182}
6183
6184void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6185 const OMPExecutableDirective &D, StringRef ParentName,
6186 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
6187 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
6188
6189 llvm::TargetRegionEntryInfo EntryInfo =
6190 getEntryInfoFromPresumedLoc(CGM, OMPBuilder, BeginLoc: D.getBeginLoc(), ParentName);
6191
6192 CodeGenFunction CGF(CGM, true);
6193 llvm::OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
6194 [&CGF, &D, &CodeGen](StringRef EntryFnName) {
6195 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
6196
6197 CGOpenMPTargetRegionInfo CGInfo(CS, CodeGen, EntryFnName);
6198 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6199 return CGF.GenerateOpenMPCapturedStmtFunction(S: CS, Loc: D.getBeginLoc());
6200 };
6201
6202 cantFail(Err: OMPBuilder.emitTargetRegionFunction(
6203 EntryInfo, GenerateFunctionCallback&: GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
6204 OutlinedFnID));
6205
6206 if (!OutlinedFn)
6207 return;
6208
6209 CGM.getTargetCodeGenInfo().setTargetAttributes(D: nullptr, GV: OutlinedFn, M&: CGM);
6210
6211 for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
6212 for (auto *A : C->getAttrs()) {
6213 if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(Val: A))
6214 CGM.handleAMDGPUWavesPerEUAttr(F: OutlinedFn, A: Attr);
6215 }
6216 }
6217}
6218
6219/// Checks if the expression is constant or does not have non-trivial function
6220/// calls.
6221static bool isTrivial(ASTContext &Ctx, const Expr * E) {
6222 // We can skip constant expressions.
6223 // We can skip expressions with trivial calls or simple expressions.
6224 return (E->isEvaluatable(Ctx, AllowSideEffects: Expr::SE_AllowUndefinedBehavior) ||
6225 !E->hasNonTrivialCall(Ctx)) &&
6226 !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
6227}
6228
6229const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
6230 const Stmt *Body) {
6231 const Stmt *Child = Body->IgnoreContainers();
6232 while (const auto *C = dyn_cast_or_null<CompoundStmt>(Val: Child)) {
6233 Child = nullptr;
6234 for (const Stmt *S : C->body()) {
6235 if (const auto *E = dyn_cast<Expr>(Val: S)) {
6236 if (isTrivial(Ctx, E))
6237 continue;
6238 }
6239 // Some of the statements can be ignored.
6240 if (isa<AsmStmt>(Val: S) || isa<NullStmt>(Val: S) || isa<OMPFlushDirective>(Val: S) ||
6241 isa<OMPBarrierDirective>(Val: S) || isa<OMPTaskyieldDirective>(Val: S))
6242 continue;
6243 // Analyze declarations.
6244 if (const auto *DS = dyn_cast<DeclStmt>(Val: S)) {
6245 if (llvm::all_of(Range: DS->decls(), P: [](const Decl *D) {
6246 if (isa<EmptyDecl>(Val: D) || isa<DeclContext>(Val: D) ||
6247 isa<TypeDecl>(Val: D) || isa<PragmaCommentDecl>(Val: D) ||
6248 isa<PragmaDetectMismatchDecl>(Val: D) || isa<UsingDecl>(Val: D) ||
6249 isa<UsingDirectiveDecl>(Val: D) ||
6250 isa<OMPDeclareReductionDecl>(Val: D) ||
6251 isa<OMPThreadPrivateDecl>(Val: D) || isa<OMPAllocateDecl>(Val: D))
6252 return true;
6253 const auto *VD = dyn_cast<VarDecl>(Val: D);
6254 if (!VD)
6255 return false;
6256 return VD->hasGlobalStorage() || !VD->isUsed();
6257 }))
6258 continue;
6259 }
6260 // Found multiple children - cannot get the one child only.
6261 if (Child)
6262 return nullptr;
6263 Child = S;
6264 }
6265 if (Child)
6266 Child = Child->IgnoreContainers();
6267 }
6268 return Child;
6269}
6270
6271const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6272 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
6273 int32_t &MaxTeamsVal) {
6274
6275 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6276 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6277 "Expected target-based executable directive.");
6278 switch (DirectiveKind) {
6279 case OMPD_target: {
6280 const auto *CS = D.getInnermostCapturedStmt();
6281 const auto *Body =
6282 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6283 const Stmt *ChildStmt =
6284 CGOpenMPRuntime::getSingleCompoundChild(Ctx&: CGF.getContext(), Body);
6285 if (const auto *NestedDir =
6286 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
6287 if (isOpenMPTeamsDirective(DKind: NestedDir->getDirectiveKind())) {
6288 if (NestedDir->hasClausesOfKind<OMPNumTeamsClause>()) {
6289 const Expr *NumTeams = NestedDir->getSingleClause<OMPNumTeamsClause>()
6290 ->getNumTeams()
6291 .front();
6292 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6293 if (auto Constant =
6294 NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6295 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6296 return NumTeams;
6297 }
6298 MinTeamsVal = MaxTeamsVal = 0;
6299 return nullptr;
6300 }
6301 MinTeamsVal = MaxTeamsVal = 1;
6302 return nullptr;
6303 }
6304 // A value of -1 is used to check if we need to emit no teams region
6305 MinTeamsVal = MaxTeamsVal = -1;
6306 return nullptr;
6307 }
6308 case OMPD_target_teams_loop:
6309 case OMPD_target_teams:
6310 case OMPD_target_teams_distribute:
6311 case OMPD_target_teams_distribute_simd:
6312 case OMPD_target_teams_distribute_parallel_for:
6313 case OMPD_target_teams_distribute_parallel_for_simd: {
6314 if (D.hasClausesOfKind<OMPNumTeamsClause>()) {
6315 const Expr *NumTeams =
6316 D.getSingleClause<OMPNumTeamsClause>()->getNumTeams().front();
6317 if (NumTeams->isIntegerConstantExpr(Ctx: CGF.getContext()))
6318 if (auto Constant = NumTeams->getIntegerConstantExpr(Ctx: CGF.getContext()))
6319 MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
6320 return NumTeams;
6321 }
6322 MinTeamsVal = MaxTeamsVal = 0;
6323 return nullptr;
6324 }
6325 case OMPD_target_parallel:
6326 case OMPD_target_parallel_for:
6327 case OMPD_target_parallel_for_simd:
6328 case OMPD_target_parallel_loop:
6329 case OMPD_target_simd:
6330 MinTeamsVal = MaxTeamsVal = 1;
6331 return nullptr;
6332 case OMPD_parallel:
6333 case OMPD_for:
6334 case OMPD_parallel_for:
6335 case OMPD_parallel_loop:
6336 case OMPD_parallel_master:
6337 case OMPD_parallel_sections:
6338 case OMPD_for_simd:
6339 case OMPD_parallel_for_simd:
6340 case OMPD_cancel:
6341 case OMPD_cancellation_point:
6342 case OMPD_ordered:
6343 case OMPD_threadprivate:
6344 case OMPD_allocate:
6345 case OMPD_task:
6346 case OMPD_simd:
6347 case OMPD_tile:
6348 case OMPD_unroll:
6349 case OMPD_sections:
6350 case OMPD_section:
6351 case OMPD_single:
6352 case OMPD_master:
6353 case OMPD_critical:
6354 case OMPD_taskyield:
6355 case OMPD_barrier:
6356 case OMPD_taskwait:
6357 case OMPD_taskgroup:
6358 case OMPD_atomic:
6359 case OMPD_flush:
6360 case OMPD_depobj:
6361 case OMPD_scan:
6362 case OMPD_teams:
6363 case OMPD_target_data:
6364 case OMPD_target_exit_data:
6365 case OMPD_target_enter_data:
6366 case OMPD_distribute:
6367 case OMPD_distribute_simd:
6368 case OMPD_distribute_parallel_for:
6369 case OMPD_distribute_parallel_for_simd:
6370 case OMPD_teams_distribute:
6371 case OMPD_teams_distribute_simd:
6372 case OMPD_teams_distribute_parallel_for:
6373 case OMPD_teams_distribute_parallel_for_simd:
6374 case OMPD_target_update:
6375 case OMPD_declare_simd:
6376 case OMPD_declare_variant:
6377 case OMPD_begin_declare_variant:
6378 case OMPD_end_declare_variant:
6379 case OMPD_declare_target:
6380 case OMPD_end_declare_target:
6381 case OMPD_declare_reduction:
6382 case OMPD_declare_mapper:
6383 case OMPD_taskloop:
6384 case OMPD_taskloop_simd:
6385 case OMPD_master_taskloop:
6386 case OMPD_master_taskloop_simd:
6387 case OMPD_parallel_master_taskloop:
6388 case OMPD_parallel_master_taskloop_simd:
6389 case OMPD_requires:
6390 case OMPD_metadirective:
6391 case OMPD_unknown:
6392 break;
6393 default:
6394 break;
6395 }
6396 llvm_unreachable("Unexpected directive kind.");
6397}
6398
6399llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6400 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6401 assert(!CGF.getLangOpts().OpenMPIsTargetDevice &&
6402 "Clauses associated with the teams directive expected to be emitted "
6403 "only for the host!");
6404 CGBuilderTy &Bld = CGF.Builder;
6405 int32_t MinNT = -1, MaxNT = -1;
6406 const Expr *NumTeams =
6407 getNumTeamsExprForTargetDirective(CGF, D, MinTeamsVal&: MinNT, MaxTeamsVal&: MaxNT);
6408 if (NumTeams != nullptr) {
6409 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6410
6411 switch (DirectiveKind) {
6412 case OMPD_target: {
6413 const auto *CS = D.getInnermostCapturedStmt();
6414 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6415 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6416 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6417 /*IgnoreResultAssign*/ true);
6418 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6419 /*isSigned=*/true);
6420 }
6421 case OMPD_target_teams:
6422 case OMPD_target_teams_distribute:
6423 case OMPD_target_teams_distribute_simd:
6424 case OMPD_target_teams_distribute_parallel_for:
6425 case OMPD_target_teams_distribute_parallel_for_simd: {
6426 CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF);
6427 llvm::Value *NumTeamsVal = CGF.EmitScalarExpr(E: NumTeams,
6428 /*IgnoreResultAssign*/ true);
6429 return Bld.CreateIntCast(V: NumTeamsVal, DestTy: CGF.Int32Ty,
6430 /*isSigned=*/true);
6431 }
6432 default:
6433 break;
6434 }
6435 }
6436
6437 assert(MinNT == MaxNT && "Num threads ranges require handling here.");
6438 return llvm::ConstantInt::get(Ty: CGF.Int32Ty, V: MinNT);
6439}
6440
6441/// Check for a num threads constant value (stored in \p DefaultVal), or
6442/// expression (stored in \p E). If the value is conditional (via an if-clause),
6443/// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6444/// nullptr, no expression evaluation is perfomed.
6445static void getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS,
6446 const Expr **E, int32_t &UpperBound,
6447 bool UpperBoundOnly, llvm::Value **CondVal) {
6448 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6449 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6450 const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6451 if (!Dir)
6452 return;
6453
6454 if (isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6455 // Handle if clause. If if clause present, the number of threads is
6456 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6457 if (CondVal && Dir->hasClausesOfKind<OMPIfClause>()) {
6458 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6459 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6460 const OMPIfClause *IfClause = nullptr;
6461 for (const auto *C : Dir->getClausesOfKind<OMPIfClause>()) {
6462 if (C->getNameModifier() == OMPD_unknown ||
6463 C->getNameModifier() == OMPD_parallel) {
6464 IfClause = C;
6465 break;
6466 }
6467 }
6468 if (IfClause) {
6469 const Expr *CondExpr = IfClause->getCondition();
6470 bool Result;
6471 if (CondExpr->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6472 if (!Result) {
6473 UpperBound = 1;
6474 return;
6475 }
6476 } else {
6477 CodeGenFunction::LexicalScope Scope(CGF, CondExpr->getSourceRange());
6478 if (const auto *PreInit =
6479 cast_or_null<DeclStmt>(Val: IfClause->getPreInitStmt())) {
6480 for (const auto *I : PreInit->decls()) {
6481 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6482 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6483 } else {
6484 CodeGenFunction::AutoVarEmission Emission =
6485 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6486 CGF.EmitAutoVarCleanups(emission: Emission);
6487 }
6488 }
6489 *CondVal = CGF.EvaluateExprAsBool(E: CondExpr);
6490 }
6491 }
6492 }
6493 }
6494 // Check the value of num_threads clause iff if clause was not specified
6495 // or is not evaluated to false.
6496 if (Dir->hasClausesOfKind<OMPNumThreadsClause>()) {
6497 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6498 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6499 const auto *NumThreadsClause =
6500 Dir->getSingleClause<OMPNumThreadsClause>();
6501 const Expr *NTExpr = NumThreadsClause->getNumThreads();
6502 if (NTExpr->isIntegerConstantExpr(Ctx: CGF.getContext()))
6503 if (auto Constant = NTExpr->getIntegerConstantExpr(Ctx: CGF.getContext()))
6504 UpperBound =
6505 UpperBound
6506 ? Constant->getZExtValue()
6507 : std::min(a: UpperBound,
6508 b: static_cast<int32_t>(Constant->getZExtValue()));
6509 // If we haven't found a upper bound, remember we saw a thread limiting
6510 // clause.
6511 if (UpperBound == -1)
6512 UpperBound = 0;
6513 if (!E)
6514 return;
6515 CodeGenFunction::LexicalScope Scope(CGF, NTExpr->getSourceRange());
6516 if (const auto *PreInit =
6517 cast_or_null<DeclStmt>(Val: NumThreadsClause->getPreInitStmt())) {
6518 for (const auto *I : PreInit->decls()) {
6519 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6520 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6521 } else {
6522 CodeGenFunction::AutoVarEmission Emission =
6523 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6524 CGF.EmitAutoVarCleanups(emission: Emission);
6525 }
6526 }
6527 }
6528 *E = NTExpr;
6529 }
6530 return;
6531 }
6532 if (isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6533 UpperBound = 1;
6534}
6535
6536const Expr *CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6537 CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &UpperBound,
6538 bool UpperBoundOnly, llvm::Value **CondVal, const Expr **ThreadLimitExpr) {
6539 assert((!CGF.getLangOpts().OpenMPIsTargetDevice || UpperBoundOnly) &&
6540 "Clauses associated with the teams directive expected to be emitted "
6541 "only for the host!");
6542 OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
6543 assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
6544 "Expected target-based executable directive.");
6545
6546 const Expr *NT = nullptr;
6547 const Expr **NTPtr = UpperBoundOnly ? nullptr : &NT;
6548
6549 auto CheckForConstExpr = [&](const Expr *E, const Expr **EPtr) {
6550 if (E->isIntegerConstantExpr(Ctx: CGF.getContext())) {
6551 if (auto Constant = E->getIntegerConstantExpr(Ctx: CGF.getContext()))
6552 UpperBound = UpperBound ? Constant->getZExtValue()
6553 : std::min(a: UpperBound,
6554 b: int32_t(Constant->getZExtValue()));
6555 }
6556 // If we haven't found a upper bound, remember we saw a thread limiting
6557 // clause.
6558 if (UpperBound == -1)
6559 UpperBound = 0;
6560 if (EPtr)
6561 *EPtr = E;
6562 };
6563
6564 auto ReturnSequential = [&]() {
6565 UpperBound = 1;
6566 return NT;
6567 };
6568
6569 switch (DirectiveKind) {
6570 case OMPD_target: {
6571 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6572 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6573 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6574 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6575 // TODO: The standard is not clear how to resolve two thread limit clauses,
6576 // let's pick the teams one if it's present, otherwise the target one.
6577 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6578 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6579 if (const auto *TLC = Dir->getSingleClause<OMPThreadLimitClause>()) {
6580 ThreadLimitClause = TLC;
6581 if (ThreadLimitExpr) {
6582 CGOpenMPInnerExprInfo CGInfo(CGF, *CS);
6583 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo);
6584 CodeGenFunction::LexicalScope Scope(
6585 CGF,
6586 ThreadLimitClause->getThreadLimit().front()->getSourceRange());
6587 if (const auto *PreInit =
6588 cast_or_null<DeclStmt>(Val: ThreadLimitClause->getPreInitStmt())) {
6589 for (const auto *I : PreInit->decls()) {
6590 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
6591 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
6592 } else {
6593 CodeGenFunction::AutoVarEmission Emission =
6594 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
6595 CGF.EmitAutoVarCleanups(emission: Emission);
6596 }
6597 }
6598 }
6599 }
6600 }
6601 }
6602 if (ThreadLimitClause)
6603 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6604 ThreadLimitExpr);
6605 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6606 if (isOpenMPTeamsDirective(DKind: Dir->getDirectiveKind()) &&
6607 !isOpenMPDistributeDirective(DKind: Dir->getDirectiveKind())) {
6608 CS = Dir->getInnermostCapturedStmt();
6609 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6610 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6611 Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child);
6612 }
6613 if (Dir && isOpenMPParallelDirective(DKind: Dir->getDirectiveKind())) {
6614 CS = Dir->getInnermostCapturedStmt();
6615 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6616 } else if (Dir && isOpenMPSimdDirective(DKind: Dir->getDirectiveKind()))
6617 return ReturnSequential();
6618 }
6619 return NT;
6620 }
6621 case OMPD_target_teams: {
6622 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6623 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6624 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6625 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6626 ThreadLimitExpr);
6627 }
6628 const CapturedStmt *CS = D.getInnermostCapturedStmt();
6629 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6630 const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild(
6631 Ctx&: CGF.getContext(), Body: CS->getCapturedStmt());
6632 if (const auto *Dir = dyn_cast_or_null<OMPExecutableDirective>(Val: Child)) {
6633 if (Dir->getDirectiveKind() == OMPD_distribute) {
6634 CS = Dir->getInnermostCapturedStmt();
6635 getNumThreads(CGF, CS, E: NTPtr, UpperBound, UpperBoundOnly, CondVal);
6636 }
6637 }
6638 return NT;
6639 }
6640 case OMPD_target_teams_distribute:
6641 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6642 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6643 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6644 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6645 ThreadLimitExpr);
6646 }
6647 getNumThreads(CGF, CS: D.getInnermostCapturedStmt(), E: NTPtr, UpperBound,
6648 UpperBoundOnly, CondVal);
6649 return NT;
6650 case OMPD_target_teams_loop:
6651 case OMPD_target_parallel_loop:
6652 case OMPD_target_parallel:
6653 case OMPD_target_parallel_for:
6654 case OMPD_target_parallel_for_simd:
6655 case OMPD_target_teams_distribute_parallel_for:
6656 case OMPD_target_teams_distribute_parallel_for_simd: {
6657 if (CondVal && D.hasClausesOfKind<OMPIfClause>()) {
6658 const OMPIfClause *IfClause = nullptr;
6659 for (const auto *C : D.getClausesOfKind<OMPIfClause>()) {
6660 if (C->getNameModifier() == OMPD_unknown ||
6661 C->getNameModifier() == OMPD_parallel) {
6662 IfClause = C;
6663 break;
6664 }
6665 }
6666 if (IfClause) {
6667 const Expr *Cond = IfClause->getCondition();
6668 bool Result;
6669 if (Cond->EvaluateAsBooleanCondition(Result, Ctx: CGF.getContext())) {
6670 if (!Result)
6671 return ReturnSequential();
6672 } else {
6673 CodeGenFunction::RunCleanupsScope Scope(CGF);
6674 *CondVal = CGF.EvaluateExprAsBool(E: Cond);
6675 }
6676 }
6677 }
6678 if (D.hasClausesOfKind<OMPThreadLimitClause>()) {
6679 CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF);
6680 const auto *ThreadLimitClause = D.getSingleClause<OMPThreadLimitClause>();
6681 CheckForConstExpr(ThreadLimitClause->getThreadLimit().front(),
6682 ThreadLimitExpr);
6683 }
6684 if (D.hasClausesOfKind<OMPNumThreadsClause>()) {
6685 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
6686 const auto *NumThreadsClause = D.getSingleClause<OMPNumThreadsClause>();
6687 CheckForConstExpr(NumThreadsClause->getNumThreads(), nullptr);
6688 return NumThreadsClause->getNumThreads();
6689 }
6690 return NT;
6691 }
6692 case OMPD_target_teams_distribute_simd:
6693 case OMPD_target_simd:
6694 return ReturnSequential();
6695 default:
6696 break;
6697 }
6698 llvm_unreachable("Unsupported directive kind.");
6699}
6700
6701llvm::Value *CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6702 CodeGenFunction &CGF, const OMPExecutableDirective &D) {
6703 llvm::Value *NumThreadsVal = nullptr;
6704 llvm::Value *CondVal = nullptr;
6705 llvm::Value *ThreadLimitVal = nullptr;
6706 const Expr *ThreadLimitExpr = nullptr;
6707 int32_t UpperBound = -1;
6708
6709 const Expr *NT = getNumThreadsExprForTargetDirective(
6710 CGF, D, UpperBound, /* UpperBoundOnly */ false, CondVal: &CondVal,
6711 ThreadLimitExpr: &ThreadLimitExpr);
6712
6713 // Thread limit expressions are used below, emit them.
6714 if (ThreadLimitExpr) {
6715 ThreadLimitVal =
6716 CGF.EmitScalarExpr(E: ThreadLimitExpr, /*IgnoreResultAssign=*/true);
6717 ThreadLimitVal = CGF.Builder.CreateIntCast(V: ThreadLimitVal, DestTy: CGF.Int32Ty,
6718 /*isSigned=*/false);
6719 }
6720
6721 // Generate the num teams expression.
6722 if (UpperBound == 1) {
6723 NumThreadsVal = CGF.Builder.getInt32(C: UpperBound);
6724 } else if (NT) {
6725 NumThreadsVal = CGF.EmitScalarExpr(E: NT, /*IgnoreResultAssign=*/true);
6726 NumThreadsVal = CGF.Builder.CreateIntCast(V: NumThreadsVal, DestTy: CGF.Int32Ty,
6727 /*isSigned=*/false);
6728 } else if (ThreadLimitVal) {
6729 // If we do not have a num threads value but a thread limit, replace the
6730 // former with the latter. We know handled the thread limit expression.
6731 NumThreadsVal = ThreadLimitVal;
6732 ThreadLimitVal = nullptr;
6733 } else {
6734 // Default to "0" which means runtime choice.
6735 assert(!ThreadLimitVal && "Default not applicable with thread limit value");
6736 NumThreadsVal = CGF.Builder.getInt32(C: 0);
6737 }
6738
6739 // Handle if clause. If if clause present, the number of threads is
6740 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6741 if (CondVal) {
6742 CodeGenFunction::RunCleanupsScope Scope(CGF);
6743 NumThreadsVal = CGF.Builder.CreateSelect(C: CondVal, True: NumThreadsVal,
6744 False: CGF.Builder.getInt32(C: 1));
6745 }
6746
6747 // If the thread limit and num teams expression were present, take the
6748 // minimum.
6749 if (ThreadLimitVal) {
6750 NumThreadsVal = CGF.Builder.CreateSelect(
6751 C: CGF.Builder.CreateICmpULT(LHS: ThreadLimitVal, RHS: NumThreadsVal),
6752 True: ThreadLimitVal, False: NumThreadsVal);
6753 }
6754
6755 return NumThreadsVal;
6756}
6757
6758namespace {
6759LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6760
6761// Utility to handle information from clauses associated with a given
6762// construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6763// It provides a convenient interface to obtain the information and generate
6764// code for that information.
6765class MappableExprsHandler {
6766public:
6767 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6768 static unsigned getFlagMemberOffset() {
6769 unsigned Offset = 0;
6770 for (uint64_t Remain =
6771 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
6772 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
6773 !(Remain & 1); Remain = Remain >> 1)
6774 Offset++;
6775 return Offset;
6776 }
6777
6778 /// Class that holds debugging information for a data mapping to be passed to
6779 /// the runtime library.
6780 class MappingExprInfo {
6781 /// The variable declaration used for the data mapping.
6782 const ValueDecl *MapDecl = nullptr;
6783 /// The original expression used in the map clause, or null if there is
6784 /// none.
6785 const Expr *MapExpr = nullptr;
6786
6787 public:
6788 MappingExprInfo(const ValueDecl *MapDecl, const Expr *MapExpr = nullptr)
6789 : MapDecl(MapDecl), MapExpr(MapExpr) {}
6790
6791 const ValueDecl *getMapDecl() const { return MapDecl; }
6792 const Expr *getMapExpr() const { return MapExpr; }
6793 };
6794
6795 using DeviceInfoTy = llvm::OpenMPIRBuilder::DeviceInfoTy;
6796 using MapBaseValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6797 using MapValuesArrayTy = llvm::OpenMPIRBuilder::MapValuesArrayTy;
6798 using MapFlagsArrayTy = llvm::OpenMPIRBuilder::MapFlagsArrayTy;
6799 using MapDimArrayTy = llvm::OpenMPIRBuilder::MapDimArrayTy;
6800 using MapNonContiguousArrayTy =
6801 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy;
6802 using MapExprsArrayTy = SmallVector<MappingExprInfo, 4>;
6803 using MapValueDeclsArrayTy = SmallVector<const ValueDecl *, 4>;
6804 using MapData =
6805 std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
6806 OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>,
6807 bool /*IsImplicit*/, const ValueDecl *, const Expr *>;
6808 using MapDataArrayTy = SmallVector<MapData, 4>;
6809
6810 /// This structure contains combined information generated for mappable
6811 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6812 /// mappers, and non-contiguous information.
6813 struct MapCombinedInfoTy : llvm::OpenMPIRBuilder::MapInfosTy {
6814 MapExprsArrayTy Exprs;
6815 MapValueDeclsArrayTy Mappers;
6816 MapValueDeclsArrayTy DevicePtrDecls;
6817
6818 /// Append arrays in \a CurInfo.
6819 void append(MapCombinedInfoTy &CurInfo) {
6820 Exprs.append(in_start: CurInfo.Exprs.begin(), in_end: CurInfo.Exprs.end());
6821 DevicePtrDecls.append(in_start: CurInfo.DevicePtrDecls.begin(),
6822 in_end: CurInfo.DevicePtrDecls.end());
6823 Mappers.append(in_start: CurInfo.Mappers.begin(), in_end: CurInfo.Mappers.end());
6824 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo);
6825 }
6826 };
6827
6828 /// Map between a struct and the its lowest & highest elements which have been
6829 /// mapped.
6830 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6831 /// HE(FieldIndex, Pointer)}
6832 struct StructRangeInfoTy {
6833 MapCombinedInfoTy PreliminaryMapData;
6834 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> LowestElem = {
6835 0, Address::invalid()};
6836 std::pair<unsigned /*FieldIndex*/, Address /*Pointer*/> HighestElem = {
6837 0, Address::invalid()};
6838 Address Base = Address::invalid();
6839 Address LB = Address::invalid();
6840 bool IsArraySection = false;
6841 bool HasCompleteRecord = false;
6842 };
6843
6844private:
6845 /// Kind that defines how a device pointer has to be returned.
6846 struct MapInfo {
6847 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
6848 OpenMPMapClauseKind MapType = OMPC_MAP_unknown;
6849 ArrayRef<OpenMPMapModifierKind> MapModifiers;
6850 ArrayRef<OpenMPMotionModifierKind> MotionModifiers;
6851 bool ReturnDevicePointer = false;
6852 bool IsImplicit = false;
6853 const ValueDecl *Mapper = nullptr;
6854 const Expr *VarRef = nullptr;
6855 bool ForDeviceAddr = false;
6856
6857 MapInfo() = default;
6858 MapInfo(
6859 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
6860 OpenMPMapClauseKind MapType,
6861 ArrayRef<OpenMPMapModifierKind> MapModifiers,
6862 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
6863 bool ReturnDevicePointer, bool IsImplicit,
6864 const ValueDecl *Mapper = nullptr, const Expr *VarRef = nullptr,
6865 bool ForDeviceAddr = false)
6866 : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
6867 MotionModifiers(MotionModifiers),
6868 ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
6869 Mapper(Mapper), VarRef(VarRef), ForDeviceAddr(ForDeviceAddr) {}
6870 };
6871
6872 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6873 /// member and there is no map information about it, then emission of that
6874 /// entry is deferred until the whole struct has been processed.
6875 struct DeferredDevicePtrEntryTy {
6876 const Expr *IE = nullptr;
6877 const ValueDecl *VD = nullptr;
6878 bool ForDeviceAddr = false;
6879
6880 DeferredDevicePtrEntryTy(const Expr *IE, const ValueDecl *VD,
6881 bool ForDeviceAddr)
6882 : IE(IE), VD(VD), ForDeviceAddr(ForDeviceAddr) {}
6883 };
6884
6885 /// The target directive from where the mappable clauses were extracted. It
6886 /// is either a executable directive or a user-defined mapper directive.
6887 llvm::PointerUnion<const OMPExecutableDirective *,
6888 const OMPDeclareMapperDecl *>
6889 CurDir;
6890
6891 /// Function the directive is being generated for.
6892 CodeGenFunction &CGF;
6893
6894 /// Set of all first private variables in the current directive.
6895 /// bool data is set to true if the variable is implicitly marked as
6896 /// firstprivate, false otherwise.
6897 llvm::DenseMap<CanonicalDeclPtr<const VarDecl>, bool> FirstPrivateDecls;
6898
6899 /// Map between device pointer declarations and their expression components.
6900 /// The key value for declarations in 'this' is null.
6901 llvm::DenseMap<
6902 const ValueDecl *,
6903 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6904 DevPointersMap;
6905
6906 /// Map between device addr declarations and their expression components.
6907 /// The key value for declarations in 'this' is null.
6908 llvm::DenseMap<
6909 const ValueDecl *,
6910 SmallVector<OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>>
6911 HasDevAddrsMap;
6912
6913 /// Map between lambda declarations and their map type.
6914 llvm::DenseMap<const ValueDecl *, const OMPMapClause *> LambdasMap;
6915
6916 llvm::Value *getExprTypeSize(const Expr *E) const {
6917 QualType ExprTy = E->getType().getCanonicalType();
6918
6919 // Calculate the size for array shaping expression.
6920 if (const auto *OAE = dyn_cast<OMPArrayShapingExpr>(Val: E)) {
6921 llvm::Value *Size =
6922 CGF.getTypeSize(Ty: OAE->getBase()->getType()->getPointeeType());
6923 for (const Expr *SE : OAE->getDimensions()) {
6924 llvm::Value *Sz = CGF.EmitScalarExpr(E: SE);
6925 Sz = CGF.EmitScalarConversion(Src: Sz, SrcTy: SE->getType(),
6926 DstTy: CGF.getContext().getSizeType(),
6927 Loc: SE->getExprLoc());
6928 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: Sz);
6929 }
6930 return Size;
6931 }
6932
6933 // Reference types are ignored for mapping purposes.
6934 if (const auto *RefTy = ExprTy->getAs<ReferenceType>())
6935 ExprTy = RefTy->getPointeeType().getCanonicalType();
6936
6937 // Given that an array section is considered a built-in type, we need to
6938 // do the calculation based on the length of the section instead of relying
6939 // on CGF.getTypeSize(E->getType()).
6940 if (const auto *OAE = dyn_cast<ArraySectionExpr>(Val: E)) {
6941 QualType BaseTy = ArraySectionExpr::getBaseOriginalType(
6942 Base: OAE->getBase()->IgnoreParenImpCasts())
6943 .getCanonicalType();
6944
6945 // If there is no length associated with the expression and lower bound is
6946 // not specified too, that means we are using the whole length of the
6947 // base.
6948 if (!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6949 !OAE->getLowerBound())
6950 return CGF.getTypeSize(Ty: BaseTy);
6951
6952 llvm::Value *ElemSize;
6953 if (const auto *PTy = BaseTy->getAs<PointerType>()) {
6954 ElemSize = CGF.getTypeSize(Ty: PTy->getPointeeType().getCanonicalType());
6955 } else {
6956 const auto *ATy = cast<ArrayType>(Val: BaseTy.getTypePtr());
6957 assert(ATy && "Expecting array type if not a pointer type.");
6958 ElemSize = CGF.getTypeSize(Ty: ATy->getElementType().getCanonicalType());
6959 }
6960
6961 // If we don't have a length at this point, that is because we have an
6962 // array section with a single element.
6963 if (!OAE->getLength() && OAE->getColonLocFirst().isInvalid())
6964 return ElemSize;
6965
6966 if (const Expr *LenExpr = OAE->getLength()) {
6967 llvm::Value *LengthVal = CGF.EmitScalarExpr(E: LenExpr);
6968 LengthVal = CGF.EmitScalarConversion(Src: LengthVal, SrcTy: LenExpr->getType(),
6969 DstTy: CGF.getContext().getSizeType(),
6970 Loc: LenExpr->getExprLoc());
6971 return CGF.Builder.CreateNUWMul(LHS: LengthVal, RHS: ElemSize);
6972 }
6973 assert(!OAE->getLength() && OAE->getColonLocFirst().isValid() &&
6974 OAE->getLowerBound() && "expected array_section[lb:].");
6975 // Size = sizetype - lb * elemtype;
6976 llvm::Value *LengthVal = CGF.getTypeSize(Ty: BaseTy);
6977 llvm::Value *LBVal = CGF.EmitScalarExpr(E: OAE->getLowerBound());
6978 LBVal = CGF.EmitScalarConversion(Src: LBVal, SrcTy: OAE->getLowerBound()->getType(),
6979 DstTy: CGF.getContext().getSizeType(),
6980 Loc: OAE->getLowerBound()->getExprLoc());
6981 LBVal = CGF.Builder.CreateNUWMul(LHS: LBVal, RHS: ElemSize);
6982 llvm::Value *Cmp = CGF.Builder.CreateICmpUGT(LHS: LengthVal, RHS: LBVal);
6983 llvm::Value *TrueVal = CGF.Builder.CreateNUWSub(LHS: LengthVal, RHS: LBVal);
6984 LengthVal = CGF.Builder.CreateSelect(
6985 C: Cmp, True: TrueVal, False: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 0));
6986 return LengthVal;
6987 }
6988 return CGF.getTypeSize(Ty: ExprTy);
6989 }
6990
6991 /// Return the corresponding bits for a given map clause modifier. Add
6992 /// a flag marking the map as a pointer if requested. Add a flag marking the
6993 /// map as the first one of a series of maps that relate to the same map
6994 /// expression.
6995 OpenMPOffloadMappingFlags getMapTypeBits(
6996 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
6997 ArrayRef<OpenMPMotionModifierKind> MotionModifiers, bool IsImplicit,
6998 bool AddPtrFlag, bool AddIsTargetParamFlag, bool IsNonContiguous) const {
6999 OpenMPOffloadMappingFlags Bits =
7000 IsImplicit ? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7001 : OpenMPOffloadMappingFlags::OMP_MAP_NONE;
7002 switch (MapType) {
7003 case OMPC_MAP_alloc:
7004 case OMPC_MAP_release:
7005 // alloc and release is the default behavior in the runtime library, i.e.
7006 // if we don't pass any bits alloc/release that is what the runtime is
7007 // going to do. Therefore, we don't need to signal anything for these two
7008 // type modifiers.
7009 break;
7010 case OMPC_MAP_to:
7011 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO;
7012 break;
7013 case OMPC_MAP_from:
7014 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7015 break;
7016 case OMPC_MAP_tofrom:
7017 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TO |
7018 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7019 break;
7020 case OMPC_MAP_delete:
7021 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_DELETE;
7022 break;
7023 case OMPC_MAP_unknown:
7024 llvm_unreachable("Unexpected map type!");
7025 }
7026 if (AddPtrFlag)
7027 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7028 if (AddIsTargetParamFlag)
7029 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
7030 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_always))
7031 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS;
7032 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_close))
7033 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE;
7034 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_present) ||
7035 llvm::is_contained(Range&: MotionModifiers, Element: OMPC_MOTION_MODIFIER_present))
7036 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
7037 if (llvm::is_contained(Range&: MapModifiers, Element: OMPC_MAP_MODIFIER_ompx_hold))
7038 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
7039 if (IsNonContiguous)
7040 Bits |= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG;
7041 return Bits;
7042 }
7043
7044 /// Return true if the provided expression is a final array section. A
7045 /// final array section, is one whose length can't be proved to be one.
7046 bool isFinalArraySectionExpression(const Expr *E) const {
7047 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E);
7048
7049 // It is not an array section and therefore not a unity-size one.
7050 if (!OASE)
7051 return false;
7052
7053 // An array section with no colon always refer to a single element.
7054 if (OASE->getColonLocFirst().isInvalid())
7055 return false;
7056
7057 const Expr *Length = OASE->getLength();
7058
7059 // If we don't have a length we have to check if the array has size 1
7060 // for this dimension. Also, we should always expect a length if the
7061 // base type is pointer.
7062 if (!Length) {
7063 QualType BaseQTy = ArraySectionExpr::getBaseOriginalType(
7064 Base: OASE->getBase()->IgnoreParenImpCasts())
7065 .getCanonicalType();
7066 if (const auto *ATy = dyn_cast<ConstantArrayType>(Val: BaseQTy.getTypePtr()))
7067 return ATy->getSExtSize() != 1;
7068 // If we don't have a constant dimension length, we have to consider
7069 // the current section as having any size, so it is not necessarily
7070 // unitary. If it happen to be unity size, that's user fault.
7071 return true;
7072 }
7073
7074 // Check if the length evaluates to 1.
7075 Expr::EvalResult Result;
7076 if (!Length->EvaluateAsInt(Result, Ctx: CGF.getContext()))
7077 return true; // Can have more that size 1.
7078
7079 llvm::APSInt ConstLength = Result.Val.getInt();
7080 return ConstLength.getSExtValue() != 1;
7081 }
7082
7083 /// Generate the base pointers, section pointers, sizes, map type bits, and
7084 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7085 /// map type, map or motion modifiers, and expression components.
7086 /// \a IsFirstComponent should be set to true if the provided set of
7087 /// components is the first associated with a capture.
7088 void generateInfoForComponentList(
7089 OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
7090 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
7091 OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
7092 MapCombinedInfoTy &CombinedInfo,
7093 MapCombinedInfoTy &StructBaseCombinedInfo,
7094 StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
7095 bool IsImplicit, bool GenerateAllInfoForClauses,
7096 const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
7097 const ValueDecl *BaseDecl = nullptr, const Expr *MapExpr = nullptr,
7098 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
7099 OverlappedElements = {},
7100 bool AreBothBasePtrAndPteeMapped = false) const {
7101 // The following summarizes what has to be generated for each map and the
7102 // types below. The generated information is expressed in this order:
7103 // base pointer, section pointer, size, flags
7104 // (to add to the ones that come from the map type and modifier).
7105 //
7106 // double d;
7107 // int i[100];
7108 // float *p;
7109 // int **a = &i;
7110 //
7111 // struct S1 {
7112 // int i;
7113 // float f[50];
7114 // }
7115 // struct S2 {
7116 // int i;
7117 // float f[50];
7118 // S1 s;
7119 // double *p;
7120 // struct S2 *ps;
7121 // int &ref;
7122 // }
7123 // S2 s;
7124 // S2 *ps;
7125 //
7126 // map(d)
7127 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7128 //
7129 // map(i)
7130 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7131 //
7132 // map(i[1:23])
7133 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7134 //
7135 // map(p)
7136 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7137 //
7138 // map(p[1:24])
7139 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7140 // in unified shared memory mode or for local pointers
7141 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7142 //
7143 // map((*a)[0:3])
7144 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7145 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
7146 //
7147 // map(**a)
7148 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
7149 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7150 //
7151 // map(s)
7152 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7153 //
7154 // map(s.i)
7155 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7156 //
7157 // map(s.s.f)
7158 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7159 //
7160 // map(s.p)
7161 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7162 //
7163 // map(to: s.p[:22])
7164 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7165 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7166 // &(s.p), &(s.p[0]), 22*sizeof(double),
7167 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7168 // (*) alloc space for struct members, only this is a target parameter
7169 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7170 // optimizes this entry out, same in the examples below)
7171 // (***) map the pointee (map: to)
7172 //
7173 // map(to: s.ref)
7174 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7175 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7176 // (*) alloc space for struct members, only this is a target parameter
7177 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7178 // optimizes this entry out, same in the examples below)
7179 // (***) map the pointee (map: to)
7180 //
7181 // map(s.ps)
7182 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7183 //
7184 // map(from: s.ps->s.i)
7185 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7186 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7187 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7188 //
7189 // map(to: s.ps->ps)
7190 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7191 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7192 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7193 //
7194 // map(s.ps->ps->ps)
7195 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7196 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7197 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7198 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7199 //
7200 // map(to: s.ps->ps->s.f[:22])
7201 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7202 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7203 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7204 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7205 //
7206 // map(ps)
7207 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7208 //
7209 // map(ps->i)
7210 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7211 //
7212 // map(ps->s.f)
7213 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7214 //
7215 // map(from: ps->p)
7216 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7217 //
7218 // map(to: ps->p[:22])
7219 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7220 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7221 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7222 //
7223 // map(ps->ps)
7224 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7225 //
7226 // map(from: ps->ps->s.i)
7227 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7228 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7229 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7230 //
7231 // map(from: ps->ps->ps)
7232 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7233 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7234 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7235 //
7236 // map(ps->ps->ps->ps)
7237 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7238 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7239 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7240 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7241 //
7242 // map(to: ps->ps->ps->s.f[:22])
7243 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7244 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7245 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7247 //
7248 // map(to: s.f[:22]) map(from: s.p[:33])
7249 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7250 // sizeof(double*) (**), TARGET_PARAM
7251 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7252 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7253 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7254 // (*) allocate contiguous space needed to fit all mapped members even if
7255 // we allocate space for members not mapped (in this example,
7256 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7257 // them as well because they fall between &s.f[0] and &s.p)
7258 //
7259 // map(from: s.f[:22]) map(to: ps->p[:33])
7260 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7261 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7262 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7263 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7264 // (*) the struct this entry pertains to is the 2nd element in the list of
7265 // arguments, hence MEMBER_OF(2)
7266 //
7267 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7268 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7269 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7270 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7271 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7272 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7273 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7274 // (*) the struct this entry pertains to is the 4th element in the list
7275 // of arguments, hence MEMBER_OF(4)
7276 //
7277 // map(p, p[:100])
7278 // ===> map(p[:100])
7279 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7280
7281 // Track if the map information being generated is the first for a capture.
7282 bool IsCaptureFirstInfo = IsFirstComponentList;
7283 // When the variable is on a declare target link or in a to clause with
7284 // unified memory, a reference is needed to hold the host/device address
7285 // of the variable.
7286 bool RequiresReference = false;
7287
7288 // Scan the components from the base to the complete expression.
7289 auto CI = Components.rbegin();
7290 auto CE = Components.rend();
7291 auto I = CI;
7292
7293 // Track if the map information being generated is the first for a list of
7294 // components.
7295 bool IsExpressionFirstInfo = true;
7296 bool FirstPointerInComplexData = false;
7297 Address BP = Address::invalid();
7298 const Expr *AssocExpr = I->getAssociatedExpression();
7299 const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr);
7300 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7301 const auto *OAShE = dyn_cast<OMPArrayShapingExpr>(Val: AssocExpr);
7302
7303 if (AreBothBasePtrAndPteeMapped && std::next(x: I) == CE)
7304 return;
7305 if (isa<MemberExpr>(Val: AssocExpr)) {
7306 // The base is the 'this' pointer. The content of the pointer is going
7307 // to be the base of the field being mapped.
7308 BP = CGF.LoadCXXThisAddress();
7309 } else if ((AE && isa<CXXThisExpr>(Val: AE->getBase()->IgnoreParenImpCasts())) ||
7310 (OASE &&
7311 isa<CXXThisExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))) {
7312 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7313 } else if (OAShE &&
7314 isa<CXXThisExpr>(Val: OAShE->getBase()->IgnoreParenCasts())) {
7315 BP = Address(
7316 CGF.EmitScalarExpr(E: OAShE->getBase()),
7317 CGF.ConvertTypeForMem(T: OAShE->getBase()->getType()->getPointeeType()),
7318 CGF.getContext().getTypeAlignInChars(T: OAShE->getBase()->getType()));
7319 } else {
7320 // The base is the reference to the variable.
7321 // BP = &Var.
7322 BP = CGF.EmitOMPSharedLValue(E: AssocExpr).getAddress();
7323 if (const auto *VD =
7324 dyn_cast_or_null<VarDecl>(Val: I->getAssociatedDeclaration())) {
7325 if (std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
7326 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD)) {
7327 if ((*Res == OMPDeclareTargetDeclAttr::MT_Link) ||
7328 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
7329 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
7330 CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7331 RequiresReference = true;
7332 BP = CGF.CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
7333 }
7334 }
7335 }
7336
7337 // If the variable is a pointer and is being dereferenced (i.e. is not
7338 // the last component), the base has to be the pointer itself, not its
7339 // reference. References are ignored for mapping purposes.
7340 QualType Ty =
7341 I->getAssociatedDeclaration()->getType().getNonReferenceType();
7342 if (Ty->isAnyPointerType() && std::next(x: I) != CE) {
7343 // No need to generate individual map information for the pointer, it
7344 // can be associated with the combined storage if shared memory mode is
7345 // active or the base declaration is not global variable.
7346 const auto *VD = dyn_cast<VarDecl>(Val: I->getAssociatedDeclaration());
7347 if (!AreBothBasePtrAndPteeMapped &&
7348 (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7349 !VD || VD->hasLocalStorage()))
7350 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7351 else
7352 FirstPointerInComplexData = true;
7353 ++I;
7354 }
7355 }
7356
7357 // Track whether a component of the list should be marked as MEMBER_OF some
7358 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7359 // in a component list should be marked as MEMBER_OF, all subsequent entries
7360 // do not belong to the base struct. E.g.
7361 // struct S2 s;
7362 // s.ps->ps->ps->f[:]
7363 // (1) (2) (3) (4)
7364 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7365 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7366 // is the pointee of ps(2) which is not member of struct s, so it should not
7367 // be marked as such (it is still PTR_AND_OBJ).
7368 // The variable is initialized to false so that PTR_AND_OBJ entries which
7369 // are not struct members are not considered (e.g. array of pointers to
7370 // data).
7371 bool ShouldBeMemberOf = false;
7372
7373 // Variable keeping track of whether or not we have encountered a component
7374 // in the component list which is a member expression. Useful when we have a
7375 // pointer or a final array section, in which case it is the previous
7376 // component in the list which tells us whether we have a member expression.
7377 // E.g. X.f[:]
7378 // While processing the final array section "[:]" it is "f" which tells us
7379 // whether we are dealing with a member of a declared struct.
7380 const MemberExpr *EncounteredME = nullptr;
7381
7382 // Track for the total number of dimension. Start from one for the dummy
7383 // dimension.
7384 uint64_t DimSize = 1;
7385
7386 bool IsNonContiguous = CombinedInfo.NonContigInfo.IsNonContiguous;
7387 bool IsPrevMemberReference = false;
7388
7389 bool IsPartialMapped =
7390 !PartialStruct.PreliminaryMapData.BasePointers.empty();
7391
7392 // We need to check if we will be encountering any MEs. If we do not
7393 // encounter any ME expression it means we will be mapping the whole struct.
7394 // In that case we need to skip adding an entry for the struct to the
7395 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7396 // list only when generating all info for clauses.
7397 bool IsMappingWholeStruct = true;
7398 if (!GenerateAllInfoForClauses) {
7399 IsMappingWholeStruct = false;
7400 } else {
7401 for (auto TempI = I; TempI != CE; ++TempI) {
7402 const MemberExpr *PossibleME =
7403 dyn_cast<MemberExpr>(Val: TempI->getAssociatedExpression());
7404 if (PossibleME) {
7405 IsMappingWholeStruct = false;
7406 break;
7407 }
7408 }
7409 }
7410
7411 for (; I != CE; ++I) {
7412 // If the current component is member of a struct (parent struct) mark it.
7413 if (!EncounteredME) {
7414 EncounteredME = dyn_cast<MemberExpr>(Val: I->getAssociatedExpression());
7415 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7416 // as MEMBER_OF the parent struct.
7417 if (EncounteredME) {
7418 ShouldBeMemberOf = true;
7419 // Do not emit as complex pointer if this is actually not array-like
7420 // expression.
7421 if (FirstPointerInComplexData) {
7422 QualType Ty = std::prev(x: I)
7423 ->getAssociatedDeclaration()
7424 ->getType()
7425 .getNonReferenceType();
7426 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7427 FirstPointerInComplexData = false;
7428 }
7429 }
7430 }
7431
7432 auto Next = std::next(x: I);
7433
7434 // We need to generate the addresses and sizes if this is the last
7435 // component, if the component is a pointer or if it is an array section
7436 // whose length can't be proved to be one. If this is a pointer, it
7437 // becomes the base address for the following components.
7438
7439 // A final array section, is one whose length can't be proved to be one.
7440 // If the map item is non-contiguous then we don't treat any array section
7441 // as final array section.
7442 bool IsFinalArraySection =
7443 !IsNonContiguous &&
7444 isFinalArraySectionExpression(E: I->getAssociatedExpression());
7445
7446 // If we have a declaration for the mapping use that, otherwise use
7447 // the base declaration of the map clause.
7448 const ValueDecl *MapDecl = (I->getAssociatedDeclaration())
7449 ? I->getAssociatedDeclaration()
7450 : BaseDecl;
7451 MapExpr = (I->getAssociatedExpression()) ? I->getAssociatedExpression()
7452 : MapExpr;
7453
7454 // Get information on whether the element is a pointer. Have to do a
7455 // special treatment for array sections given that they are built-in
7456 // types.
7457 const auto *OASE =
7458 dyn_cast<ArraySectionExpr>(Val: I->getAssociatedExpression());
7459 const auto *OAShE =
7460 dyn_cast<OMPArrayShapingExpr>(Val: I->getAssociatedExpression());
7461 const auto *UO = dyn_cast<UnaryOperator>(Val: I->getAssociatedExpression());
7462 const auto *BO = dyn_cast<BinaryOperator>(Val: I->getAssociatedExpression());
7463 bool IsPointer =
7464 OAShE ||
7465 (OASE && ArraySectionExpr::getBaseOriginalType(Base: OASE)
7466 .getCanonicalType()
7467 ->isAnyPointerType()) ||
7468 I->getAssociatedExpression()->getType()->isAnyPointerType();
7469 bool IsMemberReference = isa<MemberExpr>(Val: I->getAssociatedExpression()) &&
7470 MapDecl &&
7471 MapDecl->getType()->isLValueReferenceType();
7472 bool IsNonDerefPointer = IsPointer &&
7473 !(UO && UO->getOpcode() != UO_Deref) && !BO &&
7474 !IsNonContiguous;
7475
7476 if (OASE)
7477 ++DimSize;
7478
7479 if (Next == CE || IsMemberReference || IsNonDerefPointer ||
7480 IsFinalArraySection) {
7481 // If this is not the last component, we expect the pointer to be
7482 // associated with an array expression or member expression.
7483 assert((Next == CE ||
7484 isa<MemberExpr>(Next->getAssociatedExpression()) ||
7485 isa<ArraySubscriptExpr>(Next->getAssociatedExpression()) ||
7486 isa<ArraySectionExpr>(Next->getAssociatedExpression()) ||
7487 isa<OMPArrayShapingExpr>(Next->getAssociatedExpression()) ||
7488 isa<UnaryOperator>(Next->getAssociatedExpression()) ||
7489 isa<BinaryOperator>(Next->getAssociatedExpression())) &&
7490 "Unexpected expression");
7491
7492 Address LB = Address::invalid();
7493 Address LowestElem = Address::invalid();
7494 auto &&EmitMemberExprBase = [](CodeGenFunction &CGF,
7495 const MemberExpr *E) {
7496 const Expr *BaseExpr = E->getBase();
7497 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7498 // scalar.
7499 LValue BaseLV;
7500 if (E->isArrow()) {
7501 LValueBaseInfo BaseInfo;
7502 TBAAAccessInfo TBAAInfo;
7503 Address Addr =
7504 CGF.EmitPointerWithAlignment(Addr: BaseExpr, BaseInfo: &BaseInfo, TBAAInfo: &TBAAInfo);
7505 QualType PtrTy = BaseExpr->getType()->getPointeeType();
7506 BaseLV = CGF.MakeAddrLValue(Addr, T: PtrTy, BaseInfo, TBAAInfo);
7507 } else {
7508 BaseLV = CGF.EmitOMPSharedLValue(E: BaseExpr);
7509 }
7510 return BaseLV;
7511 };
7512 if (OAShE) {
7513 LowestElem = LB =
7514 Address(CGF.EmitScalarExpr(E: OAShE->getBase()),
7515 CGF.ConvertTypeForMem(
7516 T: OAShE->getBase()->getType()->getPointeeType()),
7517 CGF.getContext().getTypeAlignInChars(
7518 T: OAShE->getBase()->getType()));
7519 } else if (IsMemberReference) {
7520 const auto *ME = cast<MemberExpr>(Val: I->getAssociatedExpression());
7521 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7522 LowestElem = CGF.EmitLValueForFieldInitialization(
7523 Base: BaseLVal, Field: cast<FieldDecl>(Val: MapDecl))
7524 .getAddress();
7525 LB = CGF.EmitLoadOfReferenceLValue(RefAddr: LowestElem, RefTy: MapDecl->getType())
7526 .getAddress();
7527 } else {
7528 LowestElem = LB =
7529 CGF.EmitOMPSharedLValue(E: I->getAssociatedExpression())
7530 .getAddress();
7531 }
7532
7533 // If this component is a pointer inside the base struct then we don't
7534 // need to create any entry for it - it will be combined with the object
7535 // it is pointing to into a single PTR_AND_OBJ entry.
7536 bool IsMemberPointerOrAddr =
7537 EncounteredME &&
7538 (((IsPointer || ForDeviceAddr) &&
7539 I->getAssociatedExpression() == EncounteredME) ||
7540 (IsPrevMemberReference && !IsPointer) ||
7541 (IsMemberReference && Next != CE &&
7542 !Next->getAssociatedExpression()->getType()->isPointerType()));
7543 if (!OverlappedElements.empty() && Next == CE) {
7544 // Handle base element with the info for overlapped elements.
7545 assert(!PartialStruct.Base.isValid() && "The base element is set.");
7546 assert(!IsPointer &&
7547 "Unexpected base element with the pointer type.");
7548 // Mark the whole struct as the struct that requires allocation on the
7549 // device.
7550 PartialStruct.LowestElem = {0, LowestElem};
7551 CharUnits TypeSize = CGF.getContext().getTypeSizeInChars(
7552 T: I->getAssociatedExpression()->getType());
7553 Address HB = CGF.Builder.CreateConstGEP(
7554 Addr: CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
7555 Addr: LowestElem, Ty: CGF.VoidPtrTy, ElementTy: CGF.Int8Ty),
7556 Index: TypeSize.getQuantity() - 1);
7557 PartialStruct.HighestElem = {
7558 std::numeric_limits<decltype(
7559 PartialStruct.HighestElem.first)>::max(),
7560 HB};
7561 PartialStruct.Base = BP;
7562 PartialStruct.LB = LB;
7563 assert(
7564 PartialStruct.PreliminaryMapData.BasePointers.empty() &&
7565 "Overlapped elements must be used only once for the variable.");
7566 std::swap(a&: PartialStruct.PreliminaryMapData, b&: CombinedInfo);
7567 // Emit data for non-overlapped data.
7568 OpenMPOffloadMappingFlags Flags =
7569 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
7570 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7571 /*AddPtrFlag=*/false,
7572 /*AddIsTargetParamFlag=*/false, IsNonContiguous);
7573 llvm::Value *Size = nullptr;
7574 // Do bitcopy of all non-overlapped structure elements.
7575 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7576 Component : OverlappedElements) {
7577 Address ComponentLB = Address::invalid();
7578 for (const OMPClauseMappableExprCommon::MappableComponent &MC :
7579 Component) {
7580 if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7581 const auto *FD = dyn_cast<FieldDecl>(Val: VD);
7582 if (FD && FD->getType()->isLValueReferenceType()) {
7583 const auto *ME =
7584 cast<MemberExpr>(Val: MC.getAssociatedExpression());
7585 LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7586 ComponentLB =
7587 CGF.EmitLValueForFieldInitialization(Base: BaseLVal, Field: FD)
7588 .getAddress();
7589 } else {
7590 ComponentLB =
7591 CGF.EmitOMPSharedLValue(E: MC.getAssociatedExpression())
7592 .getAddress();
7593 }
7594 llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7595 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7596 Size = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: ComponentLBPtr,
7597 RHS: LBPtr);
7598 break;
7599 }
7600 }
7601 assert(Size && "Failed to determine structure size");
7602 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7603 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7604 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7605 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7606 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7607 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7608 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7609 CombinedInfo.Types.push_back(Elt: Flags);
7610 CombinedInfo.Mappers.push_back(Elt: nullptr);
7611 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7612 : 1);
7613 LB = CGF.Builder.CreateConstGEP(Addr: ComponentLB, Index: 1);
7614 }
7615 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7616 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7617 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7618 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7619 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7620 llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7621 Size = CGF.Builder.CreatePtrDiff(
7622 ElemTy: CGF.Int8Ty, LHS: CGF.Builder.CreateConstGEP(Addr: HB, Index: 1).emitRawPointer(CGF),
7623 RHS: LBPtr);
7624 CombinedInfo.Sizes.push_back(
7625 Elt: CGF.Builder.CreateIntCast(V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7626 CombinedInfo.Types.push_back(Elt: Flags);
7627 CombinedInfo.Mappers.push_back(Elt: nullptr);
7628 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7629 : 1);
7630 break;
7631 }
7632 llvm::Value *Size = getExprTypeSize(E: I->getAssociatedExpression());
7633 // Skip adding an entry in the CurInfo of this combined entry if the
7634 // whole struct is currently being mapped. The struct needs to be added
7635 // in the first position before any data internal to the struct is being
7636 // mapped.
7637 // Skip adding an entry in the CurInfo of this combined entry if the
7638 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7639 if ((!IsMemberPointerOrAddr && !IsPartialMapped) ||
7640 (Next == CE && MapType != OMPC_MAP_unknown)) {
7641 if (!IsMappingWholeStruct) {
7642 CombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7643 CombinedInfo.BasePointers.push_back(Elt: BP.emitRawPointer(CGF));
7644 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7645 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7646 CombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7647 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7648 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7649 CombinedInfo.NonContigInfo.Dims.push_back(Elt: IsNonContiguous ? DimSize
7650 : 1);
7651 } else {
7652 StructBaseCombinedInfo.Exprs.emplace_back(Args&: MapDecl, Args&: MapExpr);
7653 StructBaseCombinedInfo.BasePointers.push_back(
7654 Elt: BP.emitRawPointer(CGF));
7655 StructBaseCombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
7656 StructBaseCombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
7657 StructBaseCombinedInfo.Pointers.push_back(Elt: LB.emitRawPointer(CGF));
7658 StructBaseCombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
7659 V: Size, DestTy: CGF.Int64Ty, /*isSigned=*/true));
7660 StructBaseCombinedInfo.NonContigInfo.Dims.push_back(
7661 Elt: IsNonContiguous ? DimSize : 1);
7662 }
7663
7664 // If Mapper is valid, the last component inherits the mapper.
7665 bool HasMapper = Mapper && Next == CE;
7666 if (!IsMappingWholeStruct)
7667 CombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper : nullptr);
7668 else
7669 StructBaseCombinedInfo.Mappers.push_back(Elt: HasMapper ? Mapper
7670 : nullptr);
7671
7672 // We need to add a pointer flag for each map that comes from the
7673 // same expression except for the first one. We also need to signal
7674 // this map is the first one that relates with the current capture
7675 // (there is a set of entries for each capture).
7676 OpenMPOffloadMappingFlags Flags =
7677 getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
7678 AddPtrFlag: !IsExpressionFirstInfo || RequiresReference ||
7679 FirstPointerInComplexData || IsMemberReference,
7680 AddIsTargetParamFlag: AreBothBasePtrAndPteeMapped ||
7681 (IsCaptureFirstInfo && !RequiresReference),
7682 IsNonContiguous);
7683
7684 if (!IsExpressionFirstInfo || IsMemberReference) {
7685 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7686 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7687 if (IsPointer || (IsMemberReference && Next != CE))
7688 Flags &= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO |
7689 OpenMPOffloadMappingFlags::OMP_MAP_FROM |
7690 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS |
7691 OpenMPOffloadMappingFlags::OMP_MAP_DELETE |
7692 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE);
7693
7694 if (ShouldBeMemberOf) {
7695 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7696 // should be later updated with the correct value of MEMBER_OF.
7697 Flags |= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
7698 // From now on, all subsequent PTR_AND_OBJ entries should not be
7699 // marked as MEMBER_OF.
7700 ShouldBeMemberOf = false;
7701 }
7702 }
7703
7704 if (!IsMappingWholeStruct)
7705 CombinedInfo.Types.push_back(Elt: Flags);
7706 else
7707 StructBaseCombinedInfo.Types.push_back(Elt: Flags);
7708 }
7709
7710 // If we have encountered a member expression so far, keep track of the
7711 // mapped member. If the parent is "*this", then the value declaration
7712 // is nullptr.
7713 if (EncounteredME) {
7714 const auto *FD = cast<FieldDecl>(Val: EncounteredME->getMemberDecl());
7715 unsigned FieldIndex = FD->getFieldIndex();
7716
7717 // Update info about the lowest and highest elements for this struct
7718 if (!PartialStruct.Base.isValid()) {
7719 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7720 if (IsFinalArraySection && OASE) {
7721 Address HB =
7722 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7723 .getAddress();
7724 PartialStruct.HighestElem = {FieldIndex, HB};
7725 } else {
7726 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7727 }
7728 PartialStruct.Base = BP;
7729 PartialStruct.LB = BP;
7730 } else if (FieldIndex < PartialStruct.LowestElem.first) {
7731 PartialStruct.LowestElem = {FieldIndex, LowestElem};
7732 } else if (FieldIndex > PartialStruct.HighestElem.first) {
7733 if (IsFinalArraySection && OASE) {
7734 Address HB =
7735 CGF.EmitArraySectionExpr(E: OASE, /*IsLowerBound=*/false)
7736 .getAddress();
7737 PartialStruct.HighestElem = {FieldIndex, HB};
7738 } else {
7739 PartialStruct.HighestElem = {FieldIndex, LowestElem};
7740 }
7741 }
7742 }
7743
7744 // Need to emit combined struct for array sections.
7745 if (IsFinalArraySection || IsNonContiguous)
7746 PartialStruct.IsArraySection = true;
7747
7748 // If we have a final array section, we are done with this expression.
7749 if (IsFinalArraySection)
7750 break;
7751
7752 // The pointer becomes the base for the next element.
7753 if (Next != CE)
7754 BP = IsMemberReference ? LowestElem : LB;
7755 if (!IsPartialMapped)
7756 IsExpressionFirstInfo = false;
7757 IsCaptureFirstInfo = false;
7758 FirstPointerInComplexData = false;
7759 IsPrevMemberReference = IsMemberReference;
7760 } else if (FirstPointerInComplexData) {
7761 QualType Ty = Components.rbegin()
7762 ->getAssociatedDeclaration()
7763 ->getType()
7764 .getNonReferenceType();
7765 BP = CGF.EmitLoadOfPointer(Ptr: BP, PtrTy: Ty->castAs<PointerType>());
7766 FirstPointerInComplexData = false;
7767 }
7768 }
7769 // If ran into the whole component - allocate the space for the whole
7770 // record.
7771 if (!EncounteredME)
7772 PartialStruct.HasCompleteRecord = true;
7773
7774 if (!IsNonContiguous)
7775 return;
7776
7777 const ASTContext &Context = CGF.getContext();
7778
7779 // For supporting stride in array section, we need to initialize the first
7780 // dimension size as 1, first offset as 0, and first count as 1
7781 MapValuesArrayTy CurOffsets = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 0)};
7782 MapValuesArrayTy CurCounts = {llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7783 MapValuesArrayTy CurStrides;
7784 MapValuesArrayTy DimSizes{llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: 1)};
7785 uint64_t ElementTypeSize;
7786
7787 // Collect Size information for each dimension and get the element size as
7788 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7789 // should be [10, 10] and the first stride is 4 btyes.
7790 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7791 Components) {
7792 const Expr *AssocExpr = Component.getAssociatedExpression();
7793 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7794
7795 if (!OASE)
7796 continue;
7797
7798 QualType Ty = ArraySectionExpr::getBaseOriginalType(Base: OASE->getBase());
7799 auto *CAT = Context.getAsConstantArrayType(T: Ty);
7800 auto *VAT = Context.getAsVariableArrayType(T: Ty);
7801
7802 // We need all the dimension size except for the last dimension.
7803 assert((VAT || CAT || &Component == &*Components.begin()) &&
7804 "Should be either ConstantArray or VariableArray if not the "
7805 "first Component");
7806
7807 // Get element size if CurStrides is empty.
7808 if (CurStrides.empty()) {
7809 const Type *ElementType = nullptr;
7810 if (CAT)
7811 ElementType = CAT->getElementType().getTypePtr();
7812 else if (VAT)
7813 ElementType = VAT->getElementType().getTypePtr();
7814 else
7815 assert(&Component == &*Components.begin() &&
7816 "Only expect pointer (non CAT or VAT) when this is the "
7817 "first Component");
7818 // If ElementType is null, then it means the base is a pointer
7819 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7820 // for next iteration.
7821 if (ElementType) {
7822 // For the case that having pointer as base, we need to remove one
7823 // level of indirection.
7824 if (&Component != &*Components.begin())
7825 ElementType = ElementType->getPointeeOrArrayElementType();
7826 ElementTypeSize =
7827 Context.getTypeSizeInChars(T: ElementType).getQuantity();
7828 CurStrides.push_back(
7829 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: ElementTypeSize));
7830 }
7831 }
7832 // Get dimension value except for the last dimension since we don't need
7833 // it.
7834 if (DimSizes.size() < Components.size() - 1) {
7835 if (CAT)
7836 DimSizes.push_back(
7837 Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: CAT->getZExtSize()));
7838 else if (VAT)
7839 DimSizes.push_back(Elt: CGF.Builder.CreateIntCast(
7840 V: CGF.EmitScalarExpr(E: VAT->getSizeExpr()), DestTy: CGF.Int64Ty,
7841 /*IsSigned=*/isSigned: false));
7842 }
7843 }
7844
7845 // Skip the dummy dimension since we have already have its information.
7846 auto *DI = DimSizes.begin() + 1;
7847 // Product of dimension.
7848 llvm::Value *DimProd =
7849 llvm::ConstantInt::get(Ty: CGF.CGM.Int64Ty, V: ElementTypeSize);
7850
7851 // Collect info for non-contiguous. Notice that offset, count, and stride
7852 // are only meaningful for array-section, so we insert a null for anything
7853 // other than array-section.
7854 // Also, the size of offset, count, and stride are not the same as
7855 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7856 // count, and stride are the same as the number of non-contiguous
7857 // declaration in target update to/from clause.
7858 for (const OMPClauseMappableExprCommon::MappableComponent &Component :
7859 Components) {
7860 const Expr *AssocExpr = Component.getAssociatedExpression();
7861
7862 if (const auto *AE = dyn_cast<ArraySubscriptExpr>(Val: AssocExpr)) {
7863 llvm::Value *Offset = CGF.Builder.CreateIntCast(
7864 V: CGF.EmitScalarExpr(E: AE->getIdx()), DestTy: CGF.Int64Ty,
7865 /*isSigned=*/false);
7866 CurOffsets.push_back(Elt: Offset);
7867 CurCounts.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, /*V=*/1));
7868 CurStrides.push_back(Elt: CurStrides.back());
7869 continue;
7870 }
7871
7872 const auto *OASE = dyn_cast<ArraySectionExpr>(Val: AssocExpr);
7873
7874 if (!OASE)
7875 continue;
7876
7877 // Offset
7878 const Expr *OffsetExpr = OASE->getLowerBound();
7879 llvm::Value *Offset = nullptr;
7880 if (!OffsetExpr) {
7881 // If offset is absent, then we just set it to zero.
7882 Offset = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
7883 } else {
7884 Offset = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: OffsetExpr),
7885 DestTy: CGF.Int64Ty,
7886 /*isSigned=*/false);
7887 }
7888 CurOffsets.push_back(Elt: Offset);
7889
7890 // Count
7891 const Expr *CountExpr = OASE->getLength();
7892 llvm::Value *Count = nullptr;
7893 if (!CountExpr) {
7894 // In Clang, once a high dimension is an array section, we construct all
7895 // the lower dimension as array section, however, for case like
7896 // arr[0:2][2], Clang construct the inner dimension as an array section
7897 // but it actually is not in an array section form according to spec.
7898 if (!OASE->getColonLocFirst().isValid() &&
7899 !OASE->getColonLocSecond().isValid()) {
7900 Count = llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 1);
7901 } else {
7902 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7903 // When the length is absent it defaults to ⌈(size −
7904 // lower-bound)/stride⌉, where size is the size of the array
7905 // dimension.
7906 const Expr *StrideExpr = OASE->getStride();
7907 llvm::Value *Stride =
7908 StrideExpr
7909 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7910 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7911 : nullptr;
7912 if (Stride)
7913 Count = CGF.Builder.CreateUDiv(
7914 LHS: CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset), RHS: Stride);
7915 else
7916 Count = CGF.Builder.CreateNUWSub(LHS: *DI, RHS: Offset);
7917 }
7918 } else {
7919 Count = CGF.EmitScalarExpr(E: CountExpr);
7920 }
7921 Count = CGF.Builder.CreateIntCast(V: Count, DestTy: CGF.Int64Ty, /*isSigned=*/false);
7922 CurCounts.push_back(Elt: Count);
7923
7924 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7925 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7926 // Offset Count Stride
7927 // D0 0 1 4 (int) <- dummy dimension
7928 // D1 0 2 8 (2 * (1) * 4)
7929 // D2 1 2 20 (1 * (1 * 5) * 4)
7930 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7931 const Expr *StrideExpr = OASE->getStride();
7932 llvm::Value *Stride =
7933 StrideExpr
7934 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: StrideExpr),
7935 DestTy: CGF.Int64Ty, /*isSigned=*/false)
7936 : nullptr;
7937 DimProd = CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: *(DI - 1));
7938 if (Stride)
7939 CurStrides.push_back(Elt: CGF.Builder.CreateNUWMul(LHS: DimProd, RHS: Stride));
7940 else
7941 CurStrides.push_back(Elt: DimProd);
7942 if (DI != DimSizes.end())
7943 ++DI;
7944 }
7945
7946 CombinedInfo.NonContigInfo.Offsets.push_back(Elt: CurOffsets);
7947 CombinedInfo.NonContigInfo.Counts.push_back(Elt: CurCounts);
7948 CombinedInfo.NonContigInfo.Strides.push_back(Elt: CurStrides);
7949 }
7950
7951 /// Return the adjusted map modifiers if the declaration a capture refers to
7952 /// appears in a first-private clause. This is expected to be used only with
7953 /// directives that start with 'target'.
7954 OpenMPOffloadMappingFlags
7955 getMapModifiersForPrivateClauses(const CapturedStmt::Capture &Cap) const {
7956 assert(Cap.capturesVariable() && "Expected capture by reference only!");
7957
7958 // A first private variable captured by reference will use only the
7959 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7960 // declaration is known as first-private in this handler.
7961 if (FirstPrivateDecls.count(Val: Cap.getCapturedVar())) {
7962 if (Cap.getCapturedVar()->getType()->isAnyPointerType())
7963 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7964 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ;
7965 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE |
7966 OpenMPOffloadMappingFlags::OMP_MAP_TO;
7967 }
7968 auto I = LambdasMap.find(Val: Cap.getCapturedVar()->getCanonicalDecl());
7969 if (I != LambdasMap.end())
7970 // for map(to: lambda): using user specified map type.
7971 return getMapTypeBits(
7972 MapType: I->getSecond()->getMapType(), MapModifiers: I->getSecond()->getMapTypeModifiers(),
7973 /*MotionModifiers=*/{}, IsImplicit: I->getSecond()->isImplicit(),
7974 /*AddPtrFlag=*/false,
7975 /*AddIsTargetParamFlag=*/false,
7976 /*isNonContiguous=*/IsNonContiguous: false);
7977 return OpenMPOffloadMappingFlags::OMP_MAP_TO |
7978 OpenMPOffloadMappingFlags::OMP_MAP_FROM;
7979 }
7980
7981 void getPlainLayout(const CXXRecordDecl *RD,
7982 llvm::SmallVectorImpl<const FieldDecl *> &Layout,
7983 bool AsBase) const {
7984 const CGRecordLayout &RL = CGF.getTypes().getCGRecordLayout(RD);
7985
7986 llvm::StructType *St =
7987 AsBase ? RL.getBaseSubobjectLLVMType() : RL.getLLVMType();
7988
7989 unsigned NumElements = St->getNumElements();
7990 llvm::SmallVector<
7991 llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>, 4>
7992 RecordLayout(NumElements);
7993
7994 // Fill bases.
7995 for (const auto &I : RD->bases()) {
7996 if (I.isVirtual())
7997 continue;
7998
7999 QualType BaseTy = I.getType();
8000 const auto *Base = BaseTy->getAsCXXRecordDecl();
8001 // Ignore empty bases.
8002 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy) ||
8003 CGF.getContext()
8004 .getASTRecordLayout(D: Base)
8005 .getNonVirtualSize()
8006 .isZero())
8007 continue;
8008
8009 unsigned FieldIndex = RL.getNonVirtualBaseLLVMFieldNo(RD: Base);
8010 RecordLayout[FieldIndex] = Base;
8011 }
8012 // Fill in virtual bases.
8013 for (const auto &I : RD->vbases()) {
8014 QualType BaseTy = I.getType();
8015 // Ignore empty bases.
8016 if (isEmptyRecordForLayout(Context: CGF.getContext(), T: BaseTy))
8017 continue;
8018
8019 const auto *Base = BaseTy->getAsCXXRecordDecl();
8020 unsigned FieldIndex = RL.getVirtualBaseIndex(base: Base);
8021 if (RecordLayout[FieldIndex])
8022 continue;
8023 RecordLayout[FieldIndex] = Base;
8024 }
8025 // Fill in all the fields.
8026 assert(!RD->isUnion() && "Unexpected union.");
8027 for (const auto *Field : RD->fields()) {
8028 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8029 // will fill in later.)
8030 if (!Field->isBitField() &&
8031 !isEmptyFieldForLayout(Context: CGF.getContext(), FD: Field)) {
8032 unsigned FieldIndex = RL.getLLVMFieldNo(FD: Field);
8033 RecordLayout[FieldIndex] = Field;
8034 }
8035 }
8036 for (const llvm::PointerUnion<const CXXRecordDecl *, const FieldDecl *>
8037 &Data : RecordLayout) {
8038 if (Data.isNull())
8039 continue;
8040 if (const auto *Base = dyn_cast<const CXXRecordDecl *>(Val: Data))
8041 getPlainLayout(RD: Base, Layout, /*AsBase=*/true);
8042 else
8043 Layout.push_back(Elt: cast<const FieldDecl *>(Val: Data));
8044 }
8045 }
8046
8047 /// Generate all the base pointers, section pointers, sizes, map types, and
8048 /// mappers for the extracted mappable expressions (all included in \a
8049 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8050 /// pair of the relevant declaration and index where it occurs is appended to
8051 /// the device pointers info array.
8052 void generateAllInfoForClauses(
8053 ArrayRef<const OMPClause *> Clauses, MapCombinedInfoTy &CombinedInfo,
8054 llvm::OpenMPIRBuilder &OMPBuilder,
8055 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8056 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8057 // We have to process the component lists that relate with the same
8058 // declaration in a single chunk so that we can generate the map flags
8059 // correctly. Therefore, we organize all lists in a map.
8060 enum MapKind { Present, Allocs, Other, Total };
8061 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8062 SmallVector<SmallVector<MapInfo, 8>, 4>>
8063 Info;
8064
8065 // Helper function to fill the information map for the different supported
8066 // clauses.
8067 auto &&InfoGen =
8068 [&Info, &SkipVarSet](
8069 const ValueDecl *D, MapKind Kind,
8070 OMPClauseMappableExprCommon::MappableExprComponentListRef L,
8071 OpenMPMapClauseKind MapType,
8072 ArrayRef<OpenMPMapModifierKind> MapModifiers,
8073 ArrayRef<OpenMPMotionModifierKind> MotionModifiers,
8074 bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
8075 const Expr *VarRef = nullptr, bool ForDeviceAddr = false) {
8076 if (SkipVarSet.contains(V: D))
8077 return;
8078 auto It = Info.try_emplace(Key: D, Args: Total).first;
8079 It->second[Kind].emplace_back(
8080 Args&: L, Args&: MapType, Args&: MapModifiers, Args&: MotionModifiers, Args&: ReturnDevicePointer,
8081 Args&: IsImplicit, Args&: Mapper, Args&: VarRef, Args&: ForDeviceAddr);
8082 };
8083
8084 for (const auto *Cl : Clauses) {
8085 const auto *C = dyn_cast<OMPMapClause>(Val: Cl);
8086 if (!C)
8087 continue;
8088 MapKind Kind = Other;
8089 if (llvm::is_contained(Range: C->getMapTypeModifiers(),
8090 Element: OMPC_MAP_MODIFIER_present))
8091 Kind = Present;
8092 else if (C->getMapType() == OMPC_MAP_alloc)
8093 Kind = Allocs;
8094 const auto *EI = C->getVarRefs().begin();
8095 for (const auto L : C->component_lists()) {
8096 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8097 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), C->getMapType(),
8098 C->getMapTypeModifiers(), {},
8099 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8100 E);
8101 ++EI;
8102 }
8103 }
8104 for (const auto *Cl : Clauses) {
8105 const auto *C = dyn_cast<OMPToClause>(Val: Cl);
8106 if (!C)
8107 continue;
8108 MapKind Kind = Other;
8109 if (llvm::is_contained(Range: C->getMotionModifiers(),
8110 Element: OMPC_MOTION_MODIFIER_present))
8111 Kind = Present;
8112 const auto *EI = C->getVarRefs().begin();
8113 for (const auto L : C->component_lists()) {
8114 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_to, {},
8115 C->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8116 C->isImplicit(), std::get<2>(t: L), *EI);
8117 ++EI;
8118 }
8119 }
8120 for (const auto *Cl : Clauses) {
8121 const auto *C = dyn_cast<OMPFromClause>(Val: Cl);
8122 if (!C)
8123 continue;
8124 MapKind Kind = Other;
8125 if (llvm::is_contained(Range: C->getMotionModifiers(),
8126 Element: OMPC_MOTION_MODIFIER_present))
8127 Kind = Present;
8128 const auto *EI = C->getVarRefs().begin();
8129 for (const auto L : C->component_lists()) {
8130 InfoGen(std::get<0>(t: L), Kind, std::get<1>(t: L), OMPC_MAP_from, {},
8131 C->getMotionModifiers(),
8132 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(t: L),
8133 *EI);
8134 ++EI;
8135 }
8136 }
8137
8138 // Look at the use_device_ptr and use_device_addr clauses information and
8139 // mark the existing map entries as such. If there is no map information for
8140 // an entry in the use_device_ptr and use_device_addr list, we create one
8141 // with map type 'alloc' and zero size section. It is the user fault if that
8142 // was not mapped before. If there is no map information and the pointer is
8143 // a struct member, then we defer the emission of that entry until the whole
8144 // struct has been processed.
8145 llvm::MapVector<CanonicalDeclPtr<const Decl>,
8146 SmallVector<DeferredDevicePtrEntryTy, 4>>
8147 DeferredInfo;
8148 MapCombinedInfoTy UseDeviceDataCombinedInfo;
8149
8150 auto &&UseDeviceDataCombinedInfoGen =
8151 [&UseDeviceDataCombinedInfo](const ValueDecl *VD, llvm::Value *Ptr,
8152 CodeGenFunction &CGF, bool IsDevAddr) {
8153 UseDeviceDataCombinedInfo.Exprs.push_back(Elt: VD);
8154 UseDeviceDataCombinedInfo.BasePointers.emplace_back(Args&: Ptr);
8155 UseDeviceDataCombinedInfo.DevicePtrDecls.emplace_back(Args&: VD);
8156 UseDeviceDataCombinedInfo.DevicePointers.emplace_back(
8157 Args: IsDevAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8158 UseDeviceDataCombinedInfo.Pointers.push_back(Elt: Ptr);
8159 UseDeviceDataCombinedInfo.Sizes.push_back(
8160 Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
8161 UseDeviceDataCombinedInfo.Types.push_back(
8162 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM);
8163 UseDeviceDataCombinedInfo.Mappers.push_back(Elt: nullptr);
8164 };
8165
8166 auto &&MapInfoGen =
8167 [&DeferredInfo, &UseDeviceDataCombinedInfoGen,
8168 &InfoGen](CodeGenFunction &CGF, const Expr *IE, const ValueDecl *VD,
8169 OMPClauseMappableExprCommon::MappableExprComponentListRef
8170 Components,
8171 bool IsImplicit, bool IsDevAddr) {
8172 // We didn't find any match in our map information - generate a zero
8173 // size array section - if the pointer is a struct member we defer
8174 // this action until the whole struct has been processed.
8175 if (isa<MemberExpr>(Val: IE)) {
8176 // Insert the pointer into Info to be processed by
8177 // generateInfoForComponentList. Because it is a member pointer
8178 // without a pointee, no entry will be generated for it, therefore
8179 // we need to generate one after the whole struct has been
8180 // processed. Nonetheless, generateInfoForComponentList must be
8181 // called to take the pointer into account for the calculation of
8182 // the range of the partial struct.
8183 InfoGen(nullptr, Other, Components, OMPC_MAP_unknown, {}, {},
8184 /*ReturnDevicePointer=*/false, IsImplicit, nullptr, nullptr,
8185 IsDevAddr);
8186 DeferredInfo[nullptr].emplace_back(Args&: IE, Args&: VD, Args&: IsDevAddr);
8187 } else {
8188 llvm::Value *Ptr;
8189 if (IsDevAddr) {
8190 if (IE->isGLValue())
8191 Ptr = CGF.EmitLValue(E: IE).getPointer(CGF);
8192 else
8193 Ptr = CGF.EmitScalarExpr(E: IE);
8194 } else {
8195 Ptr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: IE), Loc: IE->getExprLoc());
8196 }
8197 UseDeviceDataCombinedInfoGen(VD, Ptr, CGF, IsDevAddr);
8198 }
8199 };
8200
8201 auto &&IsMapInfoExist = [&Info](CodeGenFunction &CGF, const ValueDecl *VD,
8202 const Expr *IE, bool IsDevAddr) -> bool {
8203 // We potentially have map information for this declaration already.
8204 // Look for the first set of components that refer to it. If found,
8205 // return true.
8206 // If the first component is a member expression, we have to look into
8207 // 'this', which maps to null in the map of map information. Otherwise
8208 // look directly for the information.
8209 auto It = Info.find(Key: isa<MemberExpr>(Val: IE) ? nullptr : VD);
8210 if (It != Info.end()) {
8211 bool Found = false;
8212 for (auto &Data : It->second) {
8213 auto *CI = llvm::find_if(Range&: Data, P: [VD](const MapInfo &MI) {
8214 return MI.Components.back().getAssociatedDeclaration() == VD;
8215 });
8216 // If we found a map entry, signal that the pointer has to be
8217 // returned and move on to the next declaration. Exclude cases where
8218 // the base pointer is mapped as array subscript, array section or
8219 // array shaping. The base address is passed as a pointer to base in
8220 // this case and cannot be used as a base for use_device_ptr list
8221 // item.
8222 if (CI != Data.end()) {
8223 if (IsDevAddr) {
8224 CI->ForDeviceAddr = IsDevAddr;
8225 CI->ReturnDevicePointer = true;
8226 Found = true;
8227 break;
8228 } else {
8229 auto PrevCI = std::next(x: CI->Components.rbegin());
8230 const auto *VarD = dyn_cast<VarDecl>(Val: VD);
8231 if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8232 isa<MemberExpr>(Val: IE) ||
8233 !VD->getType().getNonReferenceType()->isPointerType() ||
8234 PrevCI == CI->Components.rend() ||
8235 isa<MemberExpr>(Val: PrevCI->getAssociatedExpression()) || !VarD ||
8236 VarD->hasLocalStorage()) {
8237 CI->ForDeviceAddr = IsDevAddr;
8238 CI->ReturnDevicePointer = true;
8239 Found = true;
8240 break;
8241 }
8242 }
8243 }
8244 }
8245 return Found;
8246 }
8247 return false;
8248 };
8249
8250 // Look at the use_device_ptr clause information and mark the existing map
8251 // entries as such. If there is no map information for an entry in the
8252 // use_device_ptr list, we create one with map type 'alloc' and zero size
8253 // section. It is the user fault if that was not mapped before. If there is
8254 // no map information and the pointer is a struct member, then we defer the
8255 // emission of that entry until the whole struct has been processed.
8256 for (const auto *Cl : Clauses) {
8257 const auto *C = dyn_cast<OMPUseDevicePtrClause>(Val: Cl);
8258 if (!C)
8259 continue;
8260 for (const auto L : C->component_lists()) {
8261 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8262 std::get<1>(t: L);
8263 assert(!Components.empty() &&
8264 "Not expecting empty list of components!");
8265 const ValueDecl *VD = Components.back().getAssociatedDeclaration();
8266 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8267 const Expr *IE = Components.back().getAssociatedExpression();
8268 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/false))
8269 continue;
8270 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8271 /*IsDevAddr=*/false);
8272 }
8273 }
8274
8275 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
8276 for (const auto *Cl : Clauses) {
8277 const auto *C = dyn_cast<OMPUseDeviceAddrClause>(Val: Cl);
8278 if (!C)
8279 continue;
8280 for (const auto L : C->component_lists()) {
8281 OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
8282 std::get<1>(t: L);
8283 assert(!std::get<1>(L).empty() &&
8284 "Not expecting empty list of components!");
8285 const ValueDecl *VD = std::get<1>(t: L).back().getAssociatedDeclaration();
8286 if (!Processed.insert(V: VD).second)
8287 continue;
8288 VD = cast<ValueDecl>(Val: VD->getCanonicalDecl());
8289 const Expr *IE = std::get<1>(t: L).back().getAssociatedExpression();
8290 if (IsMapInfoExist(CGF, VD, IE, /*IsDevAddr=*/true))
8291 continue;
8292 MapInfoGen(CGF, IE, VD, Components, C->isImplicit(),
8293 /*IsDevAddr=*/true);
8294 }
8295 }
8296
8297 for (const auto &Data : Info) {
8298 StructRangeInfoTy PartialStruct;
8299 // Current struct information:
8300 MapCombinedInfoTy CurInfo;
8301 // Current struct base information:
8302 MapCombinedInfoTy StructBaseCurInfo;
8303 const Decl *D = Data.first;
8304 const ValueDecl *VD = cast_or_null<ValueDecl>(Val: D);
8305 bool HasMapBasePtr = false;
8306 bool HasMapArraySec = false;
8307 if (VD && VD->getType()->isAnyPointerType()) {
8308 for (const auto &M : Data.second) {
8309 HasMapBasePtr = any_of(Range: M, P: [](const MapInfo &L) {
8310 return isa_and_present<DeclRefExpr>(Val: L.VarRef);
8311 });
8312 HasMapArraySec = any_of(Range: M, P: [](const MapInfo &L) {
8313 return isa_and_present<ArraySectionExpr, ArraySubscriptExpr>(
8314 Val: L.VarRef);
8315 });
8316 if (HasMapBasePtr && HasMapArraySec)
8317 break;
8318 }
8319 }
8320 for (const auto &M : Data.second) {
8321 for (const MapInfo &L : M) {
8322 assert(!L.Components.empty() &&
8323 "Not expecting declaration with no component lists.");
8324
8325 // Remember the current base pointer index.
8326 unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
8327 unsigned StructBasePointersIdx =
8328 StructBaseCurInfo.BasePointers.size();
8329 CurInfo.NonContigInfo.IsNonContiguous =
8330 L.Components.back().isNonContiguous();
8331 generateInfoForComponentList(
8332 MapType: L.MapType, MapModifiers: L.MapModifiers, MotionModifiers: L.MotionModifiers, Components: L.Components,
8333 CombinedInfo&: CurInfo, StructBaseCombinedInfo&: StructBaseCurInfo, PartialStruct,
8334 /*IsFirstComponentList=*/false, IsImplicit: L.IsImplicit,
8335 /*GenerateAllInfoForClauses*/ true, Mapper: L.Mapper, ForDeviceAddr: L.ForDeviceAddr, BaseDecl: VD,
8336 MapExpr: L.VarRef, /*OverlappedElements*/ {},
8337 AreBothBasePtrAndPteeMapped: HasMapBasePtr && HasMapArraySec);
8338
8339 // If this entry relates to a device pointer, set the relevant
8340 // declaration and add the 'return pointer' flag.
8341 if (L.ReturnDevicePointer) {
8342 // Check whether a value was added to either CurInfo or
8343 // StructBaseCurInfo and error if no value was added to either of
8344 // them:
8345 assert((CurrentBasePointersIdx < CurInfo.BasePointers.size() ||
8346 StructBasePointersIdx <
8347 StructBaseCurInfo.BasePointers.size()) &&
8348 "Unexpected number of mapped base pointers.");
8349
8350 // Choose a base pointer index which is always valid:
8351 const ValueDecl *RelevantVD =
8352 L.Components.back().getAssociatedDeclaration();
8353 assert(RelevantVD &&
8354 "No relevant declaration related with device pointer??");
8355
8356 // If StructBaseCurInfo has been updated this iteration then work on
8357 // the first new entry added to it i.e. make sure that when multiple
8358 // values are added to any of the lists, the first value added is
8359 // being modified by the assignments below (not the last value
8360 // added).
8361 if (StructBasePointersIdx < StructBaseCurInfo.BasePointers.size()) {
8362 StructBaseCurInfo.DevicePtrDecls[StructBasePointersIdx] =
8363 RelevantVD;
8364 StructBaseCurInfo.DevicePointers[StructBasePointersIdx] =
8365 L.ForDeviceAddr ? DeviceInfoTy::Address
8366 : DeviceInfoTy::Pointer;
8367 StructBaseCurInfo.Types[StructBasePointersIdx] |=
8368 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8369 } else {
8370 CurInfo.DevicePtrDecls[CurrentBasePointersIdx] = RelevantVD;
8371 CurInfo.DevicePointers[CurrentBasePointersIdx] =
8372 L.ForDeviceAddr ? DeviceInfoTy::Address
8373 : DeviceInfoTy::Pointer;
8374 CurInfo.Types[CurrentBasePointersIdx] |=
8375 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM;
8376 }
8377 }
8378 }
8379 }
8380
8381 // Append any pending zero-length pointers which are struct members and
8382 // used with use_device_ptr or use_device_addr.
8383 auto CI = DeferredInfo.find(Key: Data.first);
8384 if (CI != DeferredInfo.end()) {
8385 for (const DeferredDevicePtrEntryTy &L : CI->second) {
8386 llvm::Value *BasePtr;
8387 llvm::Value *Ptr;
8388 if (L.ForDeviceAddr) {
8389 if (L.IE->isGLValue())
8390 Ptr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8391 else
8392 Ptr = this->CGF.EmitScalarExpr(E: L.IE);
8393 BasePtr = Ptr;
8394 // Entry is RETURN_PARAM. Also, set the placeholder value
8395 // MEMBER_OF=FFFF so that the entry is later updated with the
8396 // correct value of MEMBER_OF.
8397 CurInfo.Types.push_back(
8398 Elt: OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8399 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8400 } else {
8401 BasePtr = this->CGF.EmitLValue(E: L.IE).getPointer(CGF);
8402 Ptr = this->CGF.EmitLoadOfScalar(lvalue: this->CGF.EmitLValue(E: L.IE),
8403 Loc: L.IE->getExprLoc());
8404 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8405 // placeholder value MEMBER_OF=FFFF so that the entry is later
8406 // updated with the correct value of MEMBER_OF.
8407 CurInfo.Types.push_back(
8408 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8409 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM |
8410 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
8411 }
8412 CurInfo.Exprs.push_back(Elt: L.VD);
8413 CurInfo.BasePointers.emplace_back(Args&: BasePtr);
8414 CurInfo.DevicePtrDecls.emplace_back(Args: L.VD);
8415 CurInfo.DevicePointers.emplace_back(
8416 Args: L.ForDeviceAddr ? DeviceInfoTy::Address : DeviceInfoTy::Pointer);
8417 CurInfo.Pointers.push_back(Elt: Ptr);
8418 CurInfo.Sizes.push_back(
8419 Elt: llvm::Constant::getNullValue(Ty: this->CGF.Int64Ty));
8420 CurInfo.Mappers.push_back(Elt: nullptr);
8421 }
8422 }
8423
8424 // Unify entries in one list making sure the struct mapping precedes the
8425 // individual fields:
8426 MapCombinedInfoTy UnionCurInfo;
8427 UnionCurInfo.append(CurInfo&: StructBaseCurInfo);
8428 UnionCurInfo.append(CurInfo);
8429
8430 // If there is an entry in PartialStruct it means we have a struct with
8431 // individual members mapped. Emit an extra combined entry.
8432 if (PartialStruct.Base.isValid()) {
8433 UnionCurInfo.NonContigInfo.Dims.push_back(Elt: 0);
8434 // Emit a combined entry:
8435 emitCombinedEntry(CombinedInfo, CurTypes&: UnionCurInfo.Types, PartialStruct,
8436 /*IsMapThis*/ !VD, OMPBuilder, VD);
8437 }
8438
8439 // We need to append the results of this capture to what we already have.
8440 CombinedInfo.append(CurInfo&: UnionCurInfo);
8441 }
8442 // Append data for use_device_ptr clauses.
8443 CombinedInfo.append(CurInfo&: UseDeviceDataCombinedInfo);
8444 }
8445
8446public:
8447 MappableExprsHandler(const OMPExecutableDirective &Dir, CodeGenFunction &CGF)
8448 : CurDir(&Dir), CGF(CGF) {
8449 // Extract firstprivate clause information.
8450 for (const auto *C : Dir.getClausesOfKind<OMPFirstprivateClause>())
8451 for (const auto *D : C->varlist())
8452 FirstPrivateDecls.try_emplace(
8453 Key: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D)->getDecl()), Args: C->isImplicit());
8454 // Extract implicit firstprivates from uses_allocators clauses.
8455 for (const auto *C : Dir.getClausesOfKind<OMPUsesAllocatorsClause>()) {
8456 for (unsigned I = 0, E = C->getNumberOfAllocators(); I < E; ++I) {
8457 OMPUsesAllocatorsClause::Data D = C->getAllocatorData(I);
8458 if (const auto *DRE = dyn_cast_or_null<DeclRefExpr>(Val: D.AllocatorTraits))
8459 FirstPrivateDecls.try_emplace(Key: cast<VarDecl>(Val: DRE->getDecl()),
8460 /*Implicit=*/Args: true);
8461 else if (const auto *VD = dyn_cast<VarDecl>(
8462 Val: cast<DeclRefExpr>(Val: D.Allocator->IgnoreParenImpCasts())
8463 ->getDecl()))
8464 FirstPrivateDecls.try_emplace(Key: VD, /*Implicit=*/Args: true);
8465 }
8466 }
8467 // Extract device pointer clause information.
8468 for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
8469 for (auto L : C->component_lists())
8470 DevPointersMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8471 // Extract device addr clause information.
8472 for (const auto *C : Dir.getClausesOfKind<OMPHasDeviceAddrClause>())
8473 for (auto L : C->component_lists())
8474 HasDevAddrsMap[std::get<0>(t&: L)].push_back(Elt: std::get<1>(t&: L));
8475 // Extract map information.
8476 for (const auto *C : Dir.getClausesOfKind<OMPMapClause>()) {
8477 if (C->getMapType() != OMPC_MAP_to)
8478 continue;
8479 for (auto L : C->component_lists()) {
8480 const ValueDecl *VD = std::get<0>(t&: L);
8481 const auto *RD = VD ? VD->getType()
8482 .getCanonicalType()
8483 .getNonReferenceType()
8484 ->getAsCXXRecordDecl()
8485 : nullptr;
8486 if (RD && RD->isLambda())
8487 LambdasMap.try_emplace(Key: std::get<0>(t&: L), Args&: C);
8488 }
8489 }
8490 }
8491
8492 /// Constructor for the declare mapper directive.
8493 MappableExprsHandler(const OMPDeclareMapperDecl &Dir, CodeGenFunction &CGF)
8494 : CurDir(&Dir), CGF(CGF) {}
8495
8496 /// Generate code for the combined entry if we have a partially mapped struct
8497 /// and take care of the mapping flags of the arguments corresponding to
8498 /// individual struct members.
8499 void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
8500 MapFlagsArrayTy &CurTypes,
8501 const StructRangeInfoTy &PartialStruct, bool IsMapThis,
8502 llvm::OpenMPIRBuilder &OMPBuilder,
8503 const ValueDecl *VD = nullptr,
8504 unsigned OffsetForMemberOfFlag = 0,
8505 bool NotTargetParams = true) const {
8506 if (CurTypes.size() == 1 &&
8507 ((CurTypes.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) !=
8508 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF) &&
8509 !PartialStruct.IsArraySection)
8510 return;
8511 Address LBAddr = PartialStruct.LowestElem.second;
8512 Address HBAddr = PartialStruct.HighestElem.second;
8513 if (PartialStruct.HasCompleteRecord) {
8514 LBAddr = PartialStruct.LB;
8515 HBAddr = PartialStruct.LB;
8516 }
8517 CombinedInfo.Exprs.push_back(Elt: VD);
8518 // Base is the base of the struct
8519 CombinedInfo.BasePointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8520 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8521 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8522 // Pointer is the address of the lowest element
8523 llvm::Value *LB = LBAddr.emitRawPointer(CGF);
8524 const CXXMethodDecl *MD =
8525 CGF.CurFuncDecl ? dyn_cast<CXXMethodDecl>(Val: CGF.CurFuncDecl) : nullptr;
8526 const CXXRecordDecl *RD = MD ? MD->getParent() : nullptr;
8527 bool HasBaseClass = RD && IsMapThis ? RD->getNumBases() > 0 : false;
8528 // There should not be a mapper for a combined entry.
8529 if (HasBaseClass) {
8530 // OpenMP 5.2 148:21:
8531 // If the target construct is within a class non-static member function,
8532 // and a variable is an accessible data member of the object for which the
8533 // non-static data member function is invoked, the variable is treated as
8534 // if the this[:1] expression had appeared in a map clause with a map-type
8535 // of tofrom.
8536 // Emit this[:1]
8537 CombinedInfo.Pointers.push_back(Elt: PartialStruct.Base.emitRawPointer(CGF));
8538 QualType Ty = MD->getFunctionObjectParameterType();
8539 llvm::Value *Size =
8540 CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty), DestTy: CGF.Int64Ty,
8541 /*isSigned=*/true);
8542 CombinedInfo.Sizes.push_back(Elt: Size);
8543 } else {
8544 CombinedInfo.Pointers.push_back(Elt: LB);
8545 // Size is (addr of {highest+1} element) - (addr of lowest element)
8546 llvm::Value *HB = HBAddr.emitRawPointer(CGF);
8547 llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(
8548 Ty: HBAddr.getElementType(), Ptr: HB, /*Idx0=*/1);
8549 llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(V: LB, DestTy: CGF.VoidPtrTy);
8550 llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(V: HAddr, DestTy: CGF.VoidPtrTy);
8551 llvm::Value *Diff = CGF.Builder.CreatePtrDiff(ElemTy: CGF.Int8Ty, LHS: CHAddr, RHS: CLAddr);
8552 llvm::Value *Size = CGF.Builder.CreateIntCast(V: Diff, DestTy: CGF.Int64Ty,
8553 /*isSigned=*/false);
8554 CombinedInfo.Sizes.push_back(Elt: Size);
8555 }
8556 CombinedInfo.Mappers.push_back(Elt: nullptr);
8557 // Map type is always TARGET_PARAM, if generate info for captures.
8558 CombinedInfo.Types.push_back(
8559 Elt: NotTargetParams ? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8560 : !PartialStruct.PreliminaryMapData.BasePointers.empty()
8561 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8562 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8563 // If any element has the present modifier, then make sure the runtime
8564 // doesn't attempt to allocate the struct.
8565 if (CurTypes.end() !=
8566 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8567 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8568 Type & OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
8569 }))
8570 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT;
8571 // Remove TARGET_PARAM flag from the first element
8572 (*CurTypes.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
8573 // If any element has the ompx_hold modifier, then make sure the runtime
8574 // uses the hold reference count for the struct as a whole so that it won't
8575 // be unmapped by an extra dynamic reference count decrement. Add it to all
8576 // elements as well so the runtime knows which reference count to check
8577 // when determining whether it's time for device-to-host transfers of
8578 // individual elements.
8579 if (CurTypes.end() !=
8580 llvm::find_if(Range&: CurTypes, P: [](OpenMPOffloadMappingFlags Type) {
8581 return static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
8582 Type & OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD);
8583 })) {
8584 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8585 for (auto &M : CurTypes)
8586 M |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD;
8587 }
8588
8589 // All other current entries will be MEMBER_OF the combined entry
8590 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8591 // 0xFFFF in the MEMBER_OF field).
8592 OpenMPOffloadMappingFlags MemberOfFlag = OMPBuilder.getMemberOfFlag(
8593 Position: OffsetForMemberOfFlag + CombinedInfo.BasePointers.size() - 1);
8594 for (auto &M : CurTypes)
8595 OMPBuilder.setCorrectMemberOfFlag(Flags&: M, MemberOfFlag);
8596 }
8597
8598 /// Generate all the base pointers, section pointers, sizes, map types, and
8599 /// mappers for the extracted mappable expressions (all included in \a
8600 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8601 /// pair of the relevant declaration and index where it occurs is appended to
8602 /// the device pointers info array.
8603 void generateAllInfo(
8604 MapCombinedInfoTy &CombinedInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8605 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
8606 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
8607 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8608 "Expect a executable directive");
8609 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8610 generateAllInfoForClauses(Clauses: CurExecDir->clauses(), CombinedInfo, OMPBuilder,
8611 SkipVarSet);
8612 }
8613
8614 /// Generate all the base pointers, section pointers, sizes, map types, and
8615 /// mappers for the extracted map clauses of user-defined mapper (all included
8616 /// in \a CombinedInfo).
8617 void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo,
8618 llvm::OpenMPIRBuilder &OMPBuilder) const {
8619 assert(isa<const OMPDeclareMapperDecl *>(CurDir) &&
8620 "Expect a declare mapper directive");
8621 const auto *CurMapperDir = cast<const OMPDeclareMapperDecl *>(Val: CurDir);
8622 generateAllInfoForClauses(Clauses: CurMapperDir->clauses(), CombinedInfo,
8623 OMPBuilder);
8624 }
8625
8626 /// Emit capture info for lambdas for variables captured by reference.
8627 void generateInfoForLambdaCaptures(
8628 const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
8629 llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
8630 QualType VDType = VD->getType().getCanonicalType().getNonReferenceType();
8631 const auto *RD = VDType->getAsCXXRecordDecl();
8632 if (!RD || !RD->isLambda())
8633 return;
8634 Address VDAddr(Arg, CGF.ConvertTypeForMem(T: VDType),
8635 CGF.getContext().getDeclAlign(D: VD));
8636 LValue VDLVal = CGF.MakeAddrLValue(Addr: VDAddr, T: VDType);
8637 llvm::DenseMap<const ValueDecl *, FieldDecl *> Captures;
8638 FieldDecl *ThisCapture = nullptr;
8639 RD->getCaptureFields(Captures, ThisCapture);
8640 if (ThisCapture) {
8641 LValue ThisLVal =
8642 CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: ThisCapture);
8643 LValue ThisLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: ThisCapture);
8644 LambdaPointers.try_emplace(Key: ThisLVal.getPointer(CGF),
8645 Args: VDLVal.getPointer(CGF));
8646 CombinedInfo.Exprs.push_back(Elt: VD);
8647 CombinedInfo.BasePointers.push_back(Elt: ThisLVal.getPointer(CGF));
8648 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8649 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8650 CombinedInfo.Pointers.push_back(Elt: ThisLValVal.getPointer(CGF));
8651 CombinedInfo.Sizes.push_back(
8652 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy),
8653 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8654 CombinedInfo.Types.push_back(
8655 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8656 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8657 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8658 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8659 CombinedInfo.Mappers.push_back(Elt: nullptr);
8660 }
8661 for (const LambdaCapture &LC : RD->captures()) {
8662 if (!LC.capturesVariable())
8663 continue;
8664 const VarDecl *VD = cast<VarDecl>(Val: LC.getCapturedVar());
8665 if (LC.getCaptureKind() != LCK_ByRef && !VD->getType()->isPointerType())
8666 continue;
8667 auto It = Captures.find(Val: VD);
8668 assert(It != Captures.end() && "Found lambda capture without field.");
8669 LValue VarLVal = CGF.EmitLValueForFieldInitialization(Base: VDLVal, Field: It->second);
8670 if (LC.getCaptureKind() == LCK_ByRef) {
8671 LValue VarLValVal = CGF.EmitLValueForField(Base: VDLVal, Field: It->second);
8672 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8673 Args: VDLVal.getPointer(CGF));
8674 CombinedInfo.Exprs.push_back(Elt: VD);
8675 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8676 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8677 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8678 CombinedInfo.Pointers.push_back(Elt: VarLValVal.getPointer(CGF));
8679 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8680 V: CGF.getTypeSize(
8681 Ty: VD->getType().getCanonicalType().getNonReferenceType()),
8682 DestTy: CGF.Int64Ty, /*isSigned=*/true));
8683 } else {
8684 RValue VarRVal = CGF.EmitLoadOfLValue(V: VarLVal, Loc: RD->getLocation());
8685 LambdaPointers.try_emplace(Key: VarLVal.getPointer(CGF),
8686 Args: VDLVal.getPointer(CGF));
8687 CombinedInfo.Exprs.push_back(Elt: VD);
8688 CombinedInfo.BasePointers.push_back(Elt: VarLVal.getPointer(CGF));
8689 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
8690 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
8691 CombinedInfo.Pointers.push_back(Elt: VarRVal.getScalarVal());
8692 CombinedInfo.Sizes.push_back(Elt: llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0));
8693 }
8694 CombinedInfo.Types.push_back(
8695 Elt: OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8696 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8697 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8698 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
8699 CombinedInfo.Mappers.push_back(Elt: nullptr);
8700 }
8701 }
8702
8703 /// Set correct indices for lambdas captures.
8704 void adjustMemberOfForLambdaCaptures(
8705 llvm::OpenMPIRBuilder &OMPBuilder,
8706 const llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers,
8707 MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
8708 MapFlagsArrayTy &Types) const {
8709 for (unsigned I = 0, E = Types.size(); I < E; ++I) {
8710 // Set correct member_of idx for all implicit lambda captures.
8711 if (Types[I] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ |
8712 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8713 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF |
8714 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT))
8715 continue;
8716 llvm::Value *BasePtr = LambdaPointers.lookup(Val: BasePointers[I]);
8717 assert(BasePtr && "Unable to find base lambda address.");
8718 int TgtIdx = -1;
8719 for (unsigned J = I; J > 0; --J) {
8720 unsigned Idx = J - 1;
8721 if (Pointers[Idx] != BasePtr)
8722 continue;
8723 TgtIdx = Idx;
8724 break;
8725 }
8726 assert(TgtIdx != -1 && "Unable to find parent lambda.");
8727 // All other current entries will be MEMBER_OF the combined entry
8728 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8729 // 0xFFFF in the MEMBER_OF field).
8730 OpenMPOffloadMappingFlags MemberOfFlag =
8731 OMPBuilder.getMemberOfFlag(Position: TgtIdx);
8732 OMPBuilder.setCorrectMemberOfFlag(Flags&: Types[I], MemberOfFlag);
8733 }
8734 }
8735
8736 /// For a capture that has an associated clause, generate the base pointers,
8737 /// section pointers, sizes, map types, and mappers (all included in
8738 /// \a CurCaptureVarInfo).
8739 void generateInfoForCaptureFromClauseInfo(
8740 const CapturedStmt::Capture *Cap, llvm::Value *Arg,
8741 MapCombinedInfoTy &CurCaptureVarInfo, llvm::OpenMPIRBuilder &OMPBuilder,
8742 unsigned OffsetForMemberOfFlag) const {
8743 assert(!Cap->capturesVariableArrayType() &&
8744 "Not expecting to generate map info for a variable array type!");
8745
8746 // We need to know when we generating information for the first component
8747 const ValueDecl *VD = Cap->capturesThis()
8748 ? nullptr
8749 : Cap->getCapturedVar()->getCanonicalDecl();
8750
8751 // for map(to: lambda): skip here, processing it in
8752 // generateDefaultMapInfo
8753 if (LambdasMap.count(Val: VD))
8754 return;
8755
8756 // If this declaration appears in a is_device_ptr clause we just have to
8757 // pass the pointer by value. If it is a reference to a declaration, we just
8758 // pass its value.
8759 if (VD && (DevPointersMap.count(Val: VD) || HasDevAddrsMap.count(Val: VD))) {
8760 CurCaptureVarInfo.Exprs.push_back(Elt: VD);
8761 CurCaptureVarInfo.BasePointers.emplace_back(Args&: Arg);
8762 CurCaptureVarInfo.DevicePtrDecls.emplace_back(Args&: VD);
8763 CurCaptureVarInfo.DevicePointers.emplace_back(Args: DeviceInfoTy::Pointer);
8764 CurCaptureVarInfo.Pointers.push_back(Elt: Arg);
8765 CurCaptureVarInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
8766 V: CGF.getTypeSize(Ty: CGF.getContext().VoidPtrTy), DestTy: CGF.Int64Ty,
8767 /*isSigned=*/true));
8768 CurCaptureVarInfo.Types.push_back(
8769 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
8770 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM);
8771 CurCaptureVarInfo.Mappers.push_back(Elt: nullptr);
8772 return;
8773 }
8774
8775 MapDataArrayTy DeclComponentLists;
8776 // For member fields list in is_device_ptr, store it in
8777 // DeclComponentLists for generating components info.
8778 static const OpenMPMapModifierKind Unknown = OMPC_MAP_MODIFIER_unknown;
8779 auto It = DevPointersMap.find(Val: VD);
8780 if (It != DevPointersMap.end())
8781 for (const auto &MCL : It->second)
8782 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_to, Args: Unknown,
8783 /*IsImpicit = */ Args: true, Args: nullptr,
8784 Args: nullptr);
8785 auto I = HasDevAddrsMap.find(Val: VD);
8786 if (I != HasDevAddrsMap.end())
8787 for (const auto &MCL : I->second)
8788 DeclComponentLists.emplace_back(Args: MCL, Args: OMPC_MAP_tofrom, Args: Unknown,
8789 /*IsImpicit = */ Args: true, Args: nullptr,
8790 Args: nullptr);
8791 assert(isa<const OMPExecutableDirective *>(CurDir) &&
8792 "Expect a executable directive");
8793 const auto *CurExecDir = cast<const OMPExecutableDirective *>(Val: CurDir);
8794 bool HasMapBasePtr = false;
8795 bool HasMapArraySec = false;
8796 for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
8797 const auto *EI = C->getVarRefs().begin();
8798 for (const auto L : C->decl_component_lists(VD)) {
8799 const ValueDecl *VDecl, *Mapper;
8800 // The Expression is not correct if the mapping is implicit
8801 const Expr *E = (C->getMapLoc().isValid()) ? *EI : nullptr;
8802 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8803 std::tie(args&: VDecl, args&: Components, args&: Mapper) = L;
8804 assert(VDecl == VD && "We got information for the wrong declaration??");
8805 assert(!Components.empty() &&
8806 "Not expecting declaration with no component lists.");
8807 if (VD && E && VD->getType()->isAnyPointerType() && isa<DeclRefExpr>(Val: E))
8808 HasMapBasePtr = true;
8809 if (VD && E && VD->getType()->isAnyPointerType() &&
8810 (isa<ArraySectionExpr>(Val: E) || isa<ArraySubscriptExpr>(Val: E)))
8811 HasMapArraySec = true;
8812 DeclComponentLists.emplace_back(Args&: Components, Args: C->getMapType(),
8813 Args: C->getMapTypeModifiers(),
8814 Args: C->isImplicit(), Args&: Mapper, Args&: E);
8815 ++EI;
8816 }
8817 }
8818 llvm::stable_sort(Range&: DeclComponentLists, C: [](const MapData &LHS,
8819 const MapData &RHS) {
8820 ArrayRef<OpenMPMapModifierKind> MapModifiers = std::get<2>(t: LHS);
8821 OpenMPMapClauseKind MapType = std::get<1>(t: RHS);
8822 bool HasPresent =
8823 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8824 bool HasAllocs = MapType == OMPC_MAP_alloc;
8825 MapModifiers = std::get<2>(t: RHS);
8826 MapType = std::get<1>(t: LHS);
8827 bool HasPresentR =
8828 llvm::is_contained(Range&: MapModifiers, Element: clang::OMPC_MAP_MODIFIER_present);
8829 bool HasAllocsR = MapType == OMPC_MAP_alloc;
8830 return (HasPresent && !HasPresentR) || (HasAllocs && !HasAllocsR);
8831 });
8832
8833 auto GenerateInfoForComponentLists =
8834 [&](ArrayRef<MapData> DeclComponentLists,
8835 bool IsEligibleForTargetParamFlag) {
8836 MapCombinedInfoTy CurInfoForComponentLists;
8837 StructRangeInfoTy PartialStruct;
8838
8839 if (DeclComponentLists.empty())
8840 return;
8841
8842 generateInfoForCaptureFromComponentLists(
8843 VD, DeclComponentLists, CurComponentListInfo&: CurInfoForComponentLists, PartialStruct,
8844 IsListEligibleForTargetParamFlag: IsEligibleForTargetParamFlag,
8845 /*AreBothBasePtrAndPteeMapped=*/HasMapBasePtr && HasMapArraySec);
8846
8847 // If there is an entry in PartialStruct it means we have a
8848 // struct with individual members mapped. Emit an extra combined
8849 // entry.
8850 if (PartialStruct.Base.isValid()) {
8851 CurCaptureVarInfo.append(CurInfo&: PartialStruct.PreliminaryMapData);
8852 emitCombinedEntry(
8853 CombinedInfo&: CurCaptureVarInfo, CurTypes&: CurInfoForComponentLists.Types,
8854 PartialStruct, IsMapThis: Cap->capturesThis(), OMPBuilder, VD: nullptr,
8855 OffsetForMemberOfFlag,
8856 /*NotTargetParams*/ !IsEligibleForTargetParamFlag);
8857 }
8858
8859 // Return if we didn't add any entries.
8860 if (CurInfoForComponentLists.BasePointers.empty())
8861 return;
8862
8863 CurCaptureVarInfo.append(CurInfo&: CurInfoForComponentLists);
8864 };
8865
8866 GenerateInfoForComponentLists(DeclComponentLists,
8867 /*IsEligibleForTargetParamFlag=*/true);
8868 }
8869
8870 /// Generate the base pointers, section pointers, sizes, map types, and
8871 /// mappers associated to \a DeclComponentLists for a given capture
8872 /// \a VD (all included in \a CurComponentListInfo).
8873 void generateInfoForCaptureFromComponentLists(
8874 const ValueDecl *VD, ArrayRef<MapData> DeclComponentLists,
8875 MapCombinedInfoTy &CurComponentListInfo, StructRangeInfoTy &PartialStruct,
8876 bool IsListEligibleForTargetParamFlag,
8877 bool AreBothBasePtrAndPteeMapped = false) const {
8878 // Find overlapping elements (including the offset from the base element).
8879 llvm::SmallDenseMap<
8880 const MapData *,
8881 llvm::SmallVector<
8882 OMPClauseMappableExprCommon::MappableExprComponentListRef, 4>,
8883 4>
8884 OverlappedData;
8885 size_t Count = 0;
8886 for (const MapData &L : DeclComponentLists) {
8887 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
8888 OpenMPMapClauseKind MapType;
8889 ArrayRef<OpenMPMapModifierKind> MapModifiers;
8890 bool IsImplicit;
8891 const ValueDecl *Mapper;
8892 const Expr *VarRef;
8893 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
8894 L;
8895 ++Count;
8896 for (const MapData &L1 : ArrayRef(DeclComponentLists).slice(N: Count)) {
8897 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
8898 std::tie(args&: Components1, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper,
8899 args&: VarRef) = L1;
8900 auto CI = Components.rbegin();
8901 auto CE = Components.rend();
8902 auto SI = Components1.rbegin();
8903 auto SE = Components1.rend();
8904 for (; CI != CE && SI != SE; ++CI, ++SI) {
8905 if (CI->getAssociatedExpression()->getStmtClass() !=
8906 SI->getAssociatedExpression()->getStmtClass())
8907 break;
8908 // Are we dealing with different variables/fields?
8909 if (CI->getAssociatedDeclaration() != SI->getAssociatedDeclaration())
8910 break;
8911 }
8912 // Found overlapping if, at least for one component, reached the head
8913 // of the components list.
8914 if (CI == CE || SI == SE) {
8915 // Ignore it if it is the same component.
8916 if (CI == CE && SI == SE)
8917 continue;
8918 const auto It = (SI == SE) ? CI : SI;
8919 // If one component is a pointer and another one is a kind of
8920 // dereference of this pointer (array subscript, section, dereference,
8921 // etc.), it is not an overlapping.
8922 // Same, if one component is a base and another component is a
8923 // dereferenced pointer memberexpr with the same base.
8924 if (!isa<MemberExpr>(Val: It->getAssociatedExpression()) ||
8925 (std::prev(x: It)->getAssociatedDeclaration() &&
8926 std::prev(x: It)
8927 ->getAssociatedDeclaration()
8928 ->getType()
8929 ->isPointerType()) ||
8930 (It->getAssociatedDeclaration() &&
8931 It->getAssociatedDeclaration()->getType()->isPointerType() &&
8932 std::next(x: It) != CE && std::next(x: It) != SE))
8933 continue;
8934 const MapData &BaseData = CI == CE ? L : L1;
8935 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData =
8936 SI == SE ? Components : Components1;
8937 OverlappedData[&BaseData].push_back(Elt: SubData);
8938 }
8939 }
8940 }
8941 // Sort the overlapped elements for each item.
8942 llvm::SmallVector<const FieldDecl *, 4> Layout;
8943 if (!OverlappedData.empty()) {
8944 const Type *BaseType = VD->getType().getCanonicalType().getTypePtr();
8945 const Type *OrigType = BaseType->getPointeeOrArrayElementType();
8946 while (BaseType != OrigType) {
8947 BaseType = OrigType->getCanonicalTypeInternal().getTypePtr();
8948 OrigType = BaseType->getPointeeOrArrayElementType();
8949 }
8950
8951 if (const auto *CRD = BaseType->getAsCXXRecordDecl())
8952 getPlainLayout(RD: CRD, Layout, /*AsBase=*/false);
8953 else {
8954 const auto *RD = BaseType->getAsRecordDecl();
8955 Layout.append(in_start: RD->field_begin(), in_end: RD->field_end());
8956 }
8957 }
8958 for (auto &Pair : OverlappedData) {
8959 llvm::stable_sort(
8960 Range&: Pair.getSecond(),
8961 C: [&Layout](
8962 OMPClauseMappableExprCommon::MappableExprComponentListRef First,
8963 OMPClauseMappableExprCommon::MappableExprComponentListRef
8964 Second) {
8965 auto CI = First.rbegin();
8966 auto CE = First.rend();
8967 auto SI = Second.rbegin();
8968 auto SE = Second.rend();
8969 for (; CI != CE && SI != SE; ++CI, ++SI) {
8970 if (CI->getAssociatedExpression()->getStmtClass() !=
8971 SI->getAssociatedExpression()->getStmtClass())
8972 break;
8973 // Are we dealing with different variables/fields?
8974 if (CI->getAssociatedDeclaration() !=
8975 SI->getAssociatedDeclaration())
8976 break;
8977 }
8978
8979 // Lists contain the same elements.
8980 if (CI == CE && SI == SE)
8981 return false;
8982
8983 // List with less elements is less than list with more elements.
8984 if (CI == CE || SI == SE)
8985 return CI == CE;
8986
8987 const auto *FD1 = cast<FieldDecl>(Val: CI->getAssociatedDeclaration());
8988 const auto *FD2 = cast<FieldDecl>(Val: SI->getAssociatedDeclaration());
8989 if (FD1->getParent() == FD2->getParent())
8990 return FD1->getFieldIndex() < FD2->getFieldIndex();
8991 const auto *It =
8992 llvm::find_if(Range&: Layout, P: [FD1, FD2](const FieldDecl *FD) {
8993 return FD == FD1 || FD == FD2;
8994 });
8995 return *It == FD1;
8996 });
8997 }
8998
8999 // Associated with a capture, because the mapping flags depend on it.
9000 // Go through all of the elements with the overlapped elements.
9001 bool AddTargetParamFlag = IsListEligibleForTargetParamFlag;
9002 MapCombinedInfoTy StructBaseCombinedInfo;
9003 for (const auto &Pair : OverlappedData) {
9004 const MapData &L = *Pair.getFirst();
9005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9006 OpenMPMapClauseKind MapType;
9007 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9008 bool IsImplicit;
9009 const ValueDecl *Mapper;
9010 const Expr *VarRef;
9011 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
9012 L;
9013 ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
9014 OverlappedComponents = Pair.getSecond();
9015 generateInfoForComponentList(
9016 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
9017 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList: AddTargetParamFlag, IsImplicit,
9018 /*GenerateAllInfoForClauses*/ false, Mapper,
9019 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef, OverlappedElements: OverlappedComponents);
9020 AddTargetParamFlag = false;
9021 }
9022 // Go through other elements without overlapped elements.
9023 for (const MapData &L : DeclComponentLists) {
9024 OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
9025 OpenMPMapClauseKind MapType;
9026 ArrayRef<OpenMPMapModifierKind> MapModifiers;
9027 bool IsImplicit;
9028 const ValueDecl *Mapper;
9029 const Expr *VarRef;
9030 std::tie(args&: Components, args&: MapType, args&: MapModifiers, args&: IsImplicit, args&: Mapper, args&: VarRef) =
9031 L;
9032 auto It = OverlappedData.find(Val: &L);
9033 if (It == OverlappedData.end())
9034 generateInfoForComponentList(
9035 MapType, MapModifiers, MotionModifiers: {}, Components, CombinedInfo&: CurComponentListInfo,
9036 StructBaseCombinedInfo, PartialStruct, IsFirstComponentList: AddTargetParamFlag,
9037 IsImplicit, /*GenerateAllInfoForClauses*/ false, Mapper,
9038 /*ForDeviceAddr=*/false, BaseDecl: VD, MapExpr: VarRef,
9039 /*OverlappedElements*/ {}, AreBothBasePtrAndPteeMapped);
9040 AddTargetParamFlag = false;
9041 }
9042 }
9043
9044 /// Generate the default map information for a given capture \a CI,
9045 /// record field declaration \a RI and captured value \a CV.
9046 void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
9047 const FieldDecl &RI, llvm::Value *CV,
9048 MapCombinedInfoTy &CombinedInfo) const {
9049 bool IsImplicit = true;
9050 // Do the default mapping.
9051 if (CI.capturesThis()) {
9052 CombinedInfo.Exprs.push_back(Elt: nullptr);
9053 CombinedInfo.BasePointers.push_back(Elt: CV);
9054 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9055 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9056 CombinedInfo.Pointers.push_back(Elt: CV);
9057 const auto *PtrTy = cast<PointerType>(Val: RI.getType().getTypePtr());
9058 CombinedInfo.Sizes.push_back(
9059 Elt: CGF.Builder.CreateIntCast(V: CGF.getTypeSize(Ty: PtrTy->getPointeeType()),
9060 DestTy: CGF.Int64Ty, /*isSigned=*/true));
9061 // Default map type.
9062 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_TO |
9063 OpenMPOffloadMappingFlags::OMP_MAP_FROM);
9064 } else if (CI.capturesVariableByCopy()) {
9065 const VarDecl *VD = CI.getCapturedVar();
9066 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
9067 CombinedInfo.BasePointers.push_back(Elt: CV);
9068 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9069 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9070 CombinedInfo.Pointers.push_back(Elt: CV);
9071 if (!RI.getType()->isAnyPointerType()) {
9072 // We have to signal to the runtime captures passed by value that are
9073 // not pointers.
9074 CombinedInfo.Types.push_back(
9075 Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL);
9076 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9077 V: CGF.getTypeSize(Ty: RI.getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9078 } else {
9079 // Pointers are implicitly mapped with a zero size and no flags
9080 // (other than first map that is added for all implicit maps).
9081 CombinedInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_NONE);
9082 CombinedInfo.Sizes.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int64Ty));
9083 }
9084 auto I = FirstPrivateDecls.find(Val: VD);
9085 if (I != FirstPrivateDecls.end())
9086 IsImplicit = I->getSecond();
9087 } else {
9088 assert(CI.capturesVariable() && "Expected captured reference.");
9089 const auto *PtrTy = cast<ReferenceType>(Val: RI.getType().getTypePtr());
9090 QualType ElementType = PtrTy->getPointeeType();
9091 CombinedInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9092 V: CGF.getTypeSize(Ty: ElementType), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9093 // The default map type for a scalar/complex type is 'to' because by
9094 // default the value doesn't have to be retrieved. For an aggregate
9095 // type, the default is 'tofrom'.
9096 CombinedInfo.Types.push_back(Elt: getMapModifiersForPrivateClauses(Cap: CI));
9097 const VarDecl *VD = CI.getCapturedVar();
9098 auto I = FirstPrivateDecls.find(Val: VD);
9099 CombinedInfo.Exprs.push_back(Elt: VD->getCanonicalDecl());
9100 CombinedInfo.BasePointers.push_back(Elt: CV);
9101 CombinedInfo.DevicePtrDecls.push_back(Elt: nullptr);
9102 CombinedInfo.DevicePointers.push_back(Elt: DeviceInfoTy::None);
9103 if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
9104 Address PtrAddr = CGF.EmitLoadOfReference(RefLVal: CGF.MakeAddrLValue(
9105 V: CV, T: ElementType, Alignment: CGF.getContext().getDeclAlign(D: VD),
9106 Source: AlignmentSource::Decl));
9107 CombinedInfo.Pointers.push_back(Elt: PtrAddr.emitRawPointer(CGF));
9108 } else {
9109 CombinedInfo.Pointers.push_back(Elt: CV);
9110 }
9111 if (I != FirstPrivateDecls.end())
9112 IsImplicit = I->getSecond();
9113 }
9114 // Every default map produces a single argument which is a target parameter.
9115 CombinedInfo.Types.back() |=
9116 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM;
9117
9118 // Add flag stating this is an implicit map.
9119 if (IsImplicit)
9120 CombinedInfo.Types.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT;
9121
9122 // No user-defined mapper for default mapping.
9123 CombinedInfo.Mappers.push_back(Elt: nullptr);
9124 }
9125};
9126} // anonymous namespace
9127
9128// Try to extract the base declaration from a `this->x` expression if possible.
9129static ValueDecl *getDeclFromThisExpr(const Expr *E) {
9130 if (!E)
9131 return nullptr;
9132
9133 if (const auto *OASE = dyn_cast<ArraySectionExpr>(Val: E->IgnoreParenCasts()))
9134 if (const MemberExpr *ME =
9135 dyn_cast<MemberExpr>(Val: OASE->getBase()->IgnoreParenImpCasts()))
9136 return ME->getMemberDecl();
9137 return nullptr;
9138}
9139
9140/// Emit a string constant containing the names of the values mapped to the
9141/// offloading runtime library.
9142static llvm::Constant *
9143emitMappingInformation(CodeGenFunction &CGF, llvm::OpenMPIRBuilder &OMPBuilder,
9144 MappableExprsHandler::MappingExprInfo &MapExprs) {
9145
9146 uint32_t SrcLocStrSize;
9147 if (!MapExprs.getMapDecl() && !MapExprs.getMapExpr())
9148 return OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
9149
9150 SourceLocation Loc;
9151 if (!MapExprs.getMapDecl() && MapExprs.getMapExpr()) {
9152 if (const ValueDecl *VD = getDeclFromThisExpr(E: MapExprs.getMapExpr()))
9153 Loc = VD->getLocation();
9154 else
9155 Loc = MapExprs.getMapExpr()->getExprLoc();
9156 } else {
9157 Loc = MapExprs.getMapDecl()->getLocation();
9158 }
9159
9160 std::string ExprName;
9161 if (MapExprs.getMapExpr()) {
9162 PrintingPolicy P(CGF.getContext().getLangOpts());
9163 llvm::raw_string_ostream OS(ExprName);
9164 MapExprs.getMapExpr()->printPretty(OS, Helper: nullptr, Policy: P);
9165 } else {
9166 ExprName = MapExprs.getMapDecl()->getNameAsString();
9167 }
9168
9169 std::string FileName;
9170 PresumedLoc PLoc = CGF.getContext().getSourceManager().getPresumedLoc(Loc);
9171 if (auto *DbgInfo = CGF.getDebugInfo())
9172 FileName = DbgInfo->remapDIPath(PLoc.getFilename());
9173 else
9174 FileName = PLoc.getFilename();
9175 return OMPBuilder.getOrCreateSrcLocStr(FunctionName: FileName, FileName: ExprName, Line: PLoc.getLine(),
9176 Column: PLoc.getColumn(), SrcLocStrSize);
9177}
9178/// Emit the arrays used to pass the captures and map information to the
9179/// offloading runtime library. If there is no map or capture information,
9180/// return nullptr by reference.
9181static void emitOffloadingArraysAndArgs(
9182 CodeGenFunction &CGF, MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9183 CGOpenMPRuntime::TargetDataInfo &Info, llvm::OpenMPIRBuilder &OMPBuilder,
9184 bool IsNonContiguous = false, bool ForEndCall = false) {
9185 CodeGenModule &CGM = CGF.CGM;
9186
9187 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
9188 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
9189 CGF.AllocaInsertPt->getIterator());
9190 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
9191 CGF.Builder.GetInsertPoint());
9192
9193 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
9194 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
9195 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
9196 }
9197 };
9198
9199 auto CustomMapperCB = [&](unsigned int I) {
9200 llvm::Function *MFunc = nullptr;
9201 if (CombinedInfo.Mappers[I]) {
9202 Info.HasMapper = true;
9203 MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9204 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9205 }
9206 return MFunc;
9207 };
9208 cantFail(Err: OMPBuilder.emitOffloadingArraysAndArgs(
9209 AllocaIP, CodeGenIP, Info, RTArgs&: Info.RTArgs, CombinedInfo, CustomMapperCB,
9210 IsNonContiguous, ForEndCall, DeviceAddrCB));
9211}
9212
9213/// Check for inner distribute directive.
9214static const OMPExecutableDirective *
9215getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
9216 const auto *CS = D.getInnermostCapturedStmt();
9217 const auto *Body =
9218 CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9219 const Stmt *ChildStmt =
9220 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9221
9222 if (const auto *NestedDir =
9223 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9224 OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
9225 switch (D.getDirectiveKind()) {
9226 case OMPD_target:
9227 // For now, treat 'target' with nested 'teams loop' as if it's
9228 // distributed (target teams distribute).
9229 if (isOpenMPDistributeDirective(DKind) || DKind == OMPD_teams_loop)
9230 return NestedDir;
9231 if (DKind == OMPD_teams) {
9232 Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
9233 /*IgnoreCaptured=*/true);
9234 if (!Body)
9235 return nullptr;
9236 ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body);
9237 if (const auto *NND =
9238 dyn_cast_or_null<OMPExecutableDirective>(Val: ChildStmt)) {
9239 DKind = NND->getDirectiveKind();
9240 if (isOpenMPDistributeDirective(DKind))
9241 return NND;
9242 }
9243 }
9244 return nullptr;
9245 case OMPD_target_teams:
9246 if (isOpenMPDistributeDirective(DKind))
9247 return NestedDir;
9248 return nullptr;
9249 case OMPD_target_parallel:
9250 case OMPD_target_simd:
9251 case OMPD_target_parallel_for:
9252 case OMPD_target_parallel_for_simd:
9253 return nullptr;
9254 case OMPD_target_teams_distribute:
9255 case OMPD_target_teams_distribute_simd:
9256 case OMPD_target_teams_distribute_parallel_for:
9257 case OMPD_target_teams_distribute_parallel_for_simd:
9258 case OMPD_parallel:
9259 case OMPD_for:
9260 case OMPD_parallel_for:
9261 case OMPD_parallel_master:
9262 case OMPD_parallel_sections:
9263 case OMPD_for_simd:
9264 case OMPD_parallel_for_simd:
9265 case OMPD_cancel:
9266 case OMPD_cancellation_point:
9267 case OMPD_ordered:
9268 case OMPD_threadprivate:
9269 case OMPD_allocate:
9270 case OMPD_task:
9271 case OMPD_simd:
9272 case OMPD_tile:
9273 case OMPD_unroll:
9274 case OMPD_sections:
9275 case OMPD_section:
9276 case OMPD_single:
9277 case OMPD_master:
9278 case OMPD_critical:
9279 case OMPD_taskyield:
9280 case OMPD_barrier:
9281 case OMPD_taskwait:
9282 case OMPD_taskgroup:
9283 case OMPD_atomic:
9284 case OMPD_flush:
9285 case OMPD_depobj:
9286 case OMPD_scan:
9287 case OMPD_teams:
9288 case OMPD_target_data:
9289 case OMPD_target_exit_data:
9290 case OMPD_target_enter_data:
9291 case OMPD_distribute:
9292 case OMPD_distribute_simd:
9293 case OMPD_distribute_parallel_for:
9294 case OMPD_distribute_parallel_for_simd:
9295 case OMPD_teams_distribute:
9296 case OMPD_teams_distribute_simd:
9297 case OMPD_teams_distribute_parallel_for:
9298 case OMPD_teams_distribute_parallel_for_simd:
9299 case OMPD_target_update:
9300 case OMPD_declare_simd:
9301 case OMPD_declare_variant:
9302 case OMPD_begin_declare_variant:
9303 case OMPD_end_declare_variant:
9304 case OMPD_declare_target:
9305 case OMPD_end_declare_target:
9306 case OMPD_declare_reduction:
9307 case OMPD_declare_mapper:
9308 case OMPD_taskloop:
9309 case OMPD_taskloop_simd:
9310 case OMPD_master_taskloop:
9311 case OMPD_master_taskloop_simd:
9312 case OMPD_parallel_master_taskloop:
9313 case OMPD_parallel_master_taskloop_simd:
9314 case OMPD_requires:
9315 case OMPD_metadirective:
9316 case OMPD_unknown:
9317 default:
9318 llvm_unreachable("Unexpected directive.");
9319 }
9320 }
9321
9322 return nullptr;
9323}
9324
9325/// Emit the user-defined mapper function. The code generation follows the
9326/// pattern in the example below.
9327/// \code
9328/// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9329/// void *base, void *begin,
9330/// int64_t size, int64_t type,
9331/// void *name = nullptr) {
9332/// // Allocate space for an array section first or add a base/begin for
9333/// // pointer dereference.
9334/// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9335/// !maptype.IsDelete)
9336/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9337/// size*sizeof(Ty), clearToFromMember(type));
9338/// // Map members.
9339/// for (unsigned i = 0; i < size; i++) {
9340/// // For each component specified by this mapper:
9341/// for (auto c : begin[i]->all_components) {
9342/// if (c.hasMapper())
9343/// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9344/// c.arg_type, c.arg_name);
9345/// else
9346/// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9347/// c.arg_begin, c.arg_size, c.arg_type,
9348/// c.arg_name);
9349/// }
9350/// }
9351/// // Delete the array section.
9352/// if (size > 1 && maptype.IsDelete)
9353/// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9354/// size*sizeof(Ty), clearToFromMember(type));
9355/// }
9356/// \endcode
9357void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
9358 CodeGenFunction *CGF) {
9359 if (UDMMap.count(Val: D) > 0)
9360 return;
9361 ASTContext &C = CGM.getContext();
9362 QualType Ty = D->getType();
9363 auto *MapperVarDecl =
9364 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: D->getMapperVarRef())->getDecl());
9365 CharUnits ElementSize = C.getTypeSizeInChars(T: Ty);
9366 llvm::Type *ElemTy = CGM.getTypes().ConvertTypeForMem(T: Ty);
9367
9368 CodeGenFunction MapperCGF(CGM);
9369 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9370 auto PrivatizeAndGenMapInfoCB =
9371 [&](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, llvm::Value *PtrPHI,
9372 llvm::Value *BeginArg) -> llvm::OpenMPIRBuilder::MapInfosTy & {
9373 MapperCGF.Builder.restoreIP(IP: CodeGenIP);
9374
9375 // Privatize the declared variable of mapper to be the current array
9376 // element.
9377 Address PtrCurrent(
9378 PtrPHI, ElemTy,
9379 Address(BeginArg, MapperCGF.VoidPtrTy, CGM.getPointerAlign())
9380 .getAlignment()
9381 .alignmentOfArrayElement(elementSize: ElementSize));
9382 CodeGenFunction::OMPPrivateScope Scope(MapperCGF);
9383 Scope.addPrivate(LocalVD: MapperVarDecl, Addr: PtrCurrent);
9384 (void)Scope.Privatize();
9385
9386 // Get map clause information.
9387 MappableExprsHandler MEHandler(*D, MapperCGF);
9388 MEHandler.generateAllInfoForMapper(CombinedInfo, OMPBuilder);
9389
9390 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9391 return emitMappingInformation(CGF&: MapperCGF, OMPBuilder, MapExprs&: MapExpr);
9392 };
9393 if (CGM.getCodeGenOpts().getDebugInfo() !=
9394 llvm::codegenoptions::NoDebugInfo) {
9395 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9396 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9397 F: FillInfoMap);
9398 }
9399
9400 return CombinedInfo;
9401 };
9402
9403 auto CustomMapperCB = [&](unsigned I) {
9404 llvm::Function *MapperFunc = nullptr;
9405 if (CombinedInfo.Mappers[I]) {
9406 // Call the corresponding mapper function.
9407 MapperFunc = getOrCreateUserDefinedMapperFunc(
9408 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
9409 assert(MapperFunc && "Expect a valid mapper function is available.");
9410 }
9411 return MapperFunc;
9412 };
9413
9414 SmallString<64> TyStr;
9415 llvm::raw_svector_ostream Out(TyStr);
9416 CGM.getCXXABI().getMangleContext().mangleCanonicalTypeName(T: Ty, Out);
9417 std::string Name = getName(Parts: {"omp_mapper", TyStr, D->getName()});
9418
9419 llvm::Function *NewFn = cantFail(ValOrErr: OMPBuilder.emitUserDefinedMapper(
9420 PrivAndGenMapInfoCB: PrivatizeAndGenMapInfoCB, ElemTy, FuncName: Name, CustomMapperCB));
9421 UDMMap.try_emplace(Key: D, Args&: NewFn);
9422 if (CGF)
9423 FunctionUDMMap[CGF->CurFn].push_back(Elt: D);
9424}
9425
9426llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9427 const OMPDeclareMapperDecl *D) {
9428 auto I = UDMMap.find(Val: D);
9429 if (I != UDMMap.end())
9430 return I->second;
9431 emitUserDefinedMapper(D);
9432 return UDMMap.lookup(Val: D);
9433}
9434
9435llvm::Value *CGOpenMPRuntime::emitTargetNumIterationsCall(
9436 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9437 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9438 const OMPLoopDirective &D)>
9439 SizeEmitter) {
9440 OpenMPDirectiveKind Kind = D.getDirectiveKind();
9441 const OMPExecutableDirective *TD = &D;
9442 // Get nested teams distribute kind directive, if any. For now, treat
9443 // 'target_teams_loop' as if it's really a target_teams_distribute.
9444 if ((!isOpenMPDistributeDirective(DKind: Kind) || !isOpenMPTeamsDirective(DKind: Kind)) &&
9445 Kind != OMPD_target_teams_loop)
9446 TD = getNestedDistributeDirective(Ctx&: CGM.getContext(), D);
9447 if (!TD)
9448 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9449
9450 const auto *LD = cast<OMPLoopDirective>(Val: TD);
9451 if (llvm::Value *NumIterations = SizeEmitter(CGF, *LD))
9452 return NumIterations;
9453 return llvm::ConstantInt::get(Ty: CGF.Int64Ty, V: 0);
9454}
9455
9456static void
9457emitTargetCallFallback(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9458 const OMPExecutableDirective &D,
9459 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9460 bool RequiresOuterTask, const CapturedStmt &CS,
9461 bool OffloadingMandatory, CodeGenFunction &CGF) {
9462 if (OffloadingMandatory) {
9463 CGF.Builder.CreateUnreachable();
9464 } else {
9465 if (RequiresOuterTask) {
9466 CapturedVars.clear();
9467 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9468 }
9469 OMPRuntime->emitOutlinedFunctionCall(CGF, Loc: D.getBeginLoc(), OutlinedFn,
9470 Args: CapturedVars);
9471 }
9472}
9473
9474static llvm::Value *emitDeviceID(
9475 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9476 CodeGenFunction &CGF) {
9477 // Emit device ID if any.
9478 llvm::Value *DeviceID;
9479 if (Device.getPointer()) {
9480 assert((Device.getInt() == OMPC_DEVICE_unknown ||
9481 Device.getInt() == OMPC_DEVICE_device_num) &&
9482 "Expected device_num modifier.");
9483 llvm::Value *DevVal = CGF.EmitScalarExpr(E: Device.getPointer());
9484 DeviceID =
9485 CGF.Builder.CreateIntCast(V: DevVal, DestTy: CGF.Int64Ty, /*isSigned=*/true);
9486 } else {
9487 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
9488 }
9489 return DeviceID;
9490}
9491
9492static llvm::Value *emitDynCGGroupMem(const OMPExecutableDirective &D,
9493 CodeGenFunction &CGF) {
9494 llvm::Value *DynCGroupMem = CGF.Builder.getInt32(C: 0);
9495
9496 if (auto *DynMemClause = D.getSingleClause<OMPXDynCGroupMemClause>()) {
9497 CodeGenFunction::RunCleanupsScope DynCGroupMemScope(CGF);
9498 llvm::Value *DynCGroupMemVal = CGF.EmitScalarExpr(
9499 E: DynMemClause->getSize(), /*IgnoreResultAssign=*/true);
9500 DynCGroupMem = CGF.Builder.CreateIntCast(V: DynCGroupMemVal, DestTy: CGF.Int32Ty,
9501 /*isSigned=*/false);
9502 }
9503 return DynCGroupMem;
9504}
9505static void genMapInfoForCaptures(
9506 MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9507 const CapturedStmt &CS, llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9508 llvm::OpenMPIRBuilder &OMPBuilder,
9509 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &MappedVarSet,
9510 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9511
9512 llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
9513 auto RI = CS.getCapturedRecordDecl()->field_begin();
9514 auto *CV = CapturedVars.begin();
9515 for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
9516 CE = CS.capture_end();
9517 CI != CE; ++CI, ++RI, ++CV) {
9518 MappableExprsHandler::MapCombinedInfoTy CurInfo;
9519
9520 // VLA sizes are passed to the outlined region by copy and do not have map
9521 // information associated.
9522 if (CI->capturesVariableArrayType()) {
9523 CurInfo.Exprs.push_back(Elt: nullptr);
9524 CurInfo.BasePointers.push_back(Elt: *CV);
9525 CurInfo.DevicePtrDecls.push_back(Elt: nullptr);
9526 CurInfo.DevicePointers.push_back(
9527 Elt: MappableExprsHandler::DeviceInfoTy::None);
9528 CurInfo.Pointers.push_back(Elt: *CV);
9529 CurInfo.Sizes.push_back(Elt: CGF.Builder.CreateIntCast(
9530 V: CGF.getTypeSize(Ty: RI->getType()), DestTy: CGF.Int64Ty, /*isSigned=*/true));
9531 // Copy to the device as an argument. No need to retrieve it.
9532 CurInfo.Types.push_back(Elt: OpenMPOffloadMappingFlags::OMP_MAP_LITERAL |
9533 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM |
9534 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT);
9535 CurInfo.Mappers.push_back(Elt: nullptr);
9536 } else {
9537 // If we have any information in the map clause, we use it, otherwise we
9538 // just do a default mapping.
9539 MEHandler.generateInfoForCaptureFromClauseInfo(
9540 Cap: CI, Arg: *CV, CurCaptureVarInfo&: CurInfo, OMPBuilder,
9541 /*OffsetForMemberOfFlag=*/CombinedInfo.BasePointers.size());
9542
9543 if (!CI->capturesThis())
9544 MappedVarSet.insert(V: CI->getCapturedVar());
9545 else
9546 MappedVarSet.insert(V: nullptr);
9547
9548 if (CurInfo.BasePointers.empty())
9549 MEHandler.generateDefaultMapInfo(CI: *CI, RI: **RI, CV: *CV, CombinedInfo&: CurInfo);
9550
9551 // Generate correct mapping for variables captured by reference in
9552 // lambdas.
9553 if (CI->capturesVariable())
9554 MEHandler.generateInfoForLambdaCaptures(VD: CI->getCapturedVar(), Arg: *CV,
9555 CombinedInfo&: CurInfo, LambdaPointers);
9556 }
9557 // We expect to have at least an element of information for this capture.
9558 assert(!CurInfo.BasePointers.empty() &&
9559 "Non-existing map pointer for capture!");
9560 assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
9561 CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
9562 CurInfo.BasePointers.size() == CurInfo.Types.size() &&
9563 CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
9564 "Inconsistent map information sizes!");
9565
9566 // We need to append the results of this capture to what we already have.
9567 CombinedInfo.append(CurInfo);
9568 }
9569 // Adjust MEMBER_OF flags for the lambdas captures.
9570 MEHandler.adjustMemberOfForLambdaCaptures(
9571 OMPBuilder, LambdaPointers, BasePointers&: CombinedInfo.BasePointers,
9572 Pointers&: CombinedInfo.Pointers, Types&: CombinedInfo.Types);
9573}
9574static void
9575genMapInfo(MappableExprsHandler &MEHandler, CodeGenFunction &CGF,
9576 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
9577 llvm::OpenMPIRBuilder &OMPBuilder,
9578 const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkippedVarSet =
9579 llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) {
9580
9581 CodeGenModule &CGM = CGF.CGM;
9582 // Map any list items in a map clause that were not captures because they
9583 // weren't referenced within the construct.
9584 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder, SkipVarSet: SkippedVarSet);
9585
9586 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
9587 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
9588 };
9589 if (CGM.getCodeGenOpts().getDebugInfo() !=
9590 llvm::codegenoptions::NoDebugInfo) {
9591 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
9592 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
9593 F: FillInfoMap);
9594 }
9595}
9596
9597static void genMapInfo(const OMPExecutableDirective &D, CodeGenFunction &CGF,
9598 const CapturedStmt &CS,
9599 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9600 llvm::OpenMPIRBuilder &OMPBuilder,
9601 MappableExprsHandler::MapCombinedInfoTy &CombinedInfo) {
9602 // Get mappable expression information.
9603 MappableExprsHandler MEHandler(D, CGF);
9604 llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
9605
9606 genMapInfoForCaptures(MEHandler, CGF, CS, CapturedVars, OMPBuilder,
9607 MappedVarSet, CombinedInfo);
9608 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder, SkippedVarSet: MappedVarSet);
9609}
9610
9611template <typename ClauseTy>
9612static void
9613emitClauseForBareTargetDirective(CodeGenFunction &CGF,
9614 const OMPExecutableDirective &D,
9615 llvm::SmallVectorImpl<llvm::Value *> &Values) {
9616 const auto *C = D.getSingleClause<ClauseTy>();
9617 assert(!C->varlist_empty() &&
9618 "ompx_bare requires explicit num_teams and thread_limit");
9619 CodeGenFunction::RunCleanupsScope Scope(CGF);
9620 for (auto *E : C->varlist()) {
9621 llvm::Value *V = CGF.EmitScalarExpr(E);
9622 Values.push_back(
9623 Elt: CGF.Builder.CreateIntCast(V, DestTy: CGF.Int32Ty, /*isSigned=*/true));
9624 }
9625}
9626
9627static void emitTargetCallKernelLaunch(
9628 CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9629 const OMPExecutableDirective &D,
9630 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars, bool RequiresOuterTask,
9631 const CapturedStmt &CS, bool OffloadingMandatory,
9632 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9633 llvm::Value *OutlinedFnID, CodeGenFunction::OMPTargetDataInfo &InputInfo,
9634 llvm::Value *&MapTypesArray, llvm::Value *&MapNamesArray,
9635 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9636 const OMPLoopDirective &D)>
9637 SizeEmitter,
9638 CodeGenFunction &CGF, CodeGenModule &CGM) {
9639 llvm::OpenMPIRBuilder &OMPBuilder = OMPRuntime->getOMPBuilder();
9640
9641 // Fill up the arrays with all the captured variables.
9642 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
9643 CGOpenMPRuntime::TargetDataInfo Info;
9644 genMapInfo(D, CGF, CS, CapturedVars, OMPBuilder, CombinedInfo);
9645
9646 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
9647 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9648
9649 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
9650 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
9651 CGF.VoidPtrTy, CGM.getPointerAlign());
9652 InputInfo.PointersArray =
9653 Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9654 InputInfo.SizesArray =
9655 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
9656 InputInfo.MappersArray =
9657 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
9658 MapTypesArray = Info.RTArgs.MapTypesArray;
9659 MapNamesArray = Info.RTArgs.MapNamesArray;
9660
9661 auto &&ThenGen = [&OMPRuntime, OutlinedFn, &D, &CapturedVars,
9662 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9663 OutlinedFnID, &InputInfo, &MapTypesArray, &MapNamesArray,
9664 SizeEmitter](CodeGenFunction &CGF, PrePostActionTy &) {
9665 bool IsReverseOffloading = Device.getInt() == OMPC_DEVICE_ancestor;
9666
9667 if (IsReverseOffloading) {
9668 // Reverse offloading is not supported, so just execute on the host.
9669 // FIXME: This fallback solution is incorrect since it ignores the
9670 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9671 // assert here and ensure SEMA emits an error.
9672 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9673 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9674 return;
9675 }
9676
9677 bool HasNoWait = D.hasClausesOfKind<OMPNowaitClause>();
9678 unsigned NumTargetItems = InputInfo.NumberOfTargetItems;
9679
9680 llvm::Value *BasePointersArray =
9681 InputInfo.BasePointersArray.emitRawPointer(CGF);
9682 llvm::Value *PointersArray = InputInfo.PointersArray.emitRawPointer(CGF);
9683 llvm::Value *SizesArray = InputInfo.SizesArray.emitRawPointer(CGF);
9684 llvm::Value *MappersArray = InputInfo.MappersArray.emitRawPointer(CGF);
9685
9686 auto &&EmitTargetCallFallbackCB =
9687 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9688 OffloadingMandatory, &CGF](llvm::OpenMPIRBuilder::InsertPointTy IP)
9689 -> llvm::OpenMPIRBuilder::InsertPointTy {
9690 CGF.Builder.restoreIP(IP);
9691 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9692 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9693 return CGF.Builder.saveIP();
9694 };
9695
9696 bool IsBare = D.hasClausesOfKind<OMPXBareClause>();
9697 SmallVector<llvm::Value *, 3> NumTeams;
9698 SmallVector<llvm::Value *, 3> NumThreads;
9699 if (IsBare) {
9700 emitClauseForBareTargetDirective<OMPNumTeamsClause>(CGF, D, Values&: NumTeams);
9701 emitClauseForBareTargetDirective<OMPThreadLimitClause>(CGF, D,
9702 Values&: NumThreads);
9703 } else {
9704 NumTeams.push_back(Elt: OMPRuntime->emitNumTeamsForTargetDirective(CGF, D));
9705 NumThreads.push_back(
9706 Elt: OMPRuntime->emitNumThreadsForTargetDirective(CGF, D));
9707 }
9708
9709 llvm::Value *DeviceID = emitDeviceID(Device, CGF);
9710 llvm::Value *RTLoc = OMPRuntime->emitUpdateLocation(CGF, Loc: D.getBeginLoc());
9711 llvm::Value *NumIterations =
9712 OMPRuntime->emitTargetNumIterationsCall(CGF, D, SizeEmitter);
9713 llvm::Value *DynCGGroupMem = emitDynCGGroupMem(D, CGF);
9714 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
9715 CGF.AllocaInsertPt->getParent(), CGF.AllocaInsertPt->getIterator());
9716
9717 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs(
9718 BasePointersArray, PointersArray, SizesArray, MapTypesArray,
9719 nullptr /* MapTypesArrayEnd */, MappersArray, MapNamesArray);
9720
9721 llvm::OpenMPIRBuilder::TargetKernelArgs Args(
9722 NumTargetItems, RTArgs, NumIterations, NumTeams, NumThreads,
9723 DynCGGroupMem, HasNoWait);
9724
9725 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
9726 cantFail(ValOrErr: OMPRuntime->getOMPBuilder().emitKernelLaunch(
9727 Loc: CGF.Builder, OutlinedFnID, EmitTargetCallFallbackCB, Args, DeviceID,
9728 RTLoc, AllocaIP));
9729 CGF.Builder.restoreIP(IP: AfterIP);
9730 };
9731
9732 if (RequiresOuterTask)
9733 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
9734 else
9735 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
9736}
9737
9738static void
9739emitTargetCallElse(CGOpenMPRuntime *OMPRuntime, llvm::Function *OutlinedFn,
9740 const OMPExecutableDirective &D,
9741 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars,
9742 bool RequiresOuterTask, const CapturedStmt &CS,
9743 bool OffloadingMandatory, CodeGenFunction &CGF) {
9744
9745 // Notify that the host version must be executed.
9746 auto &&ElseGen =
9747 [&OMPRuntime, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9748 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9749 emitTargetCallFallback(OMPRuntime, OutlinedFn, D, CapturedVars,
9750 RequiresOuterTask, CS, OffloadingMandatory, CGF);
9751 };
9752
9753 if (RequiresOuterTask) {
9754 CodeGenFunction::OMPTargetDataInfo InputInfo;
9755 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ElseGen, InputInfo);
9756 } else {
9757 OMPRuntime->emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ElseGen);
9758 }
9759}
9760
9761void CGOpenMPRuntime::emitTargetCall(
9762 CodeGenFunction &CGF, const OMPExecutableDirective &D,
9763 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
9764 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
9765 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
9766 const OMPLoopDirective &D)>
9767 SizeEmitter) {
9768 if (!CGF.HaveInsertPoint())
9769 return;
9770
9771 const bool OffloadingMandatory = !CGM.getLangOpts().OpenMPIsTargetDevice &&
9772 CGM.getLangOpts().OpenMPOffloadMandatory;
9773
9774 assert((OffloadingMandatory || OutlinedFn) && "Invalid outlined function!");
9775
9776 const bool RequiresOuterTask =
9777 D.hasClausesOfKind<OMPDependClause>() ||
9778 D.hasClausesOfKind<OMPNowaitClause>() ||
9779 D.hasClausesOfKind<OMPInReductionClause>() ||
9780 (CGM.getLangOpts().OpenMP >= 51 &&
9781 needsTaskBasedThreadLimit(DKind: D.getDirectiveKind()) &&
9782 D.hasClausesOfKind<OMPThreadLimitClause>());
9783 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
9784 const CapturedStmt &CS = *D.getCapturedStmt(RegionKind: OMPD_target);
9785 auto &&ArgsCodegen = [&CS, &CapturedVars](CodeGenFunction &CGF,
9786 PrePostActionTy &) {
9787 CGF.GenerateOpenMPCapturedVars(S: CS, CapturedVars);
9788 };
9789 emitInlinedDirective(CGF, InnerKind: OMPD_unknown, CodeGen: ArgsCodegen);
9790
9791 CodeGenFunction::OMPTargetDataInfo InputInfo;
9792 llvm::Value *MapTypesArray = nullptr;
9793 llvm::Value *MapNamesArray = nullptr;
9794
9795 auto &&TargetThenGen = [this, OutlinedFn, &D, &CapturedVars,
9796 RequiresOuterTask, &CS, OffloadingMandatory, Device,
9797 OutlinedFnID, &InputInfo, &MapTypesArray,
9798 &MapNamesArray, SizeEmitter](CodeGenFunction &CGF,
9799 PrePostActionTy &) {
9800 emitTargetCallKernelLaunch(OMPRuntime: this, OutlinedFn, D, CapturedVars,
9801 RequiresOuterTask, CS, OffloadingMandatory,
9802 Device, OutlinedFnID, InputInfo, MapTypesArray,
9803 MapNamesArray, SizeEmitter, CGF, CGM);
9804 };
9805
9806 auto &&TargetElseGen =
9807 [this, OutlinedFn, &D, &CapturedVars, RequiresOuterTask, &CS,
9808 OffloadingMandatory](CodeGenFunction &CGF, PrePostActionTy &) {
9809 emitTargetCallElse(OMPRuntime: this, OutlinedFn, D, CapturedVars, RequiresOuterTask,
9810 CS, OffloadingMandatory, CGF);
9811 };
9812
9813 // If we have a target function ID it means that we need to support
9814 // offloading, otherwise, just execute on the host. We need to execute on host
9815 // regardless of the conditional in the if clause if, e.g., the user do not
9816 // specify target triples.
9817 if (OutlinedFnID) {
9818 if (IfCond) {
9819 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen, ElseGen: TargetElseGen);
9820 } else {
9821 RegionCodeGenTy ThenRCG(TargetThenGen);
9822 ThenRCG(CGF);
9823 }
9824 } else {
9825 RegionCodeGenTy ElseRCG(TargetElseGen);
9826 ElseRCG(CGF);
9827 }
9828}
9829
9830void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt *S,
9831 StringRef ParentName) {
9832 if (!S)
9833 return;
9834
9835 // Codegen OMP target directives that offload compute to the device.
9836 bool RequiresDeviceCodegen =
9837 isa<OMPExecutableDirective>(Val: S) &&
9838 isOpenMPTargetExecutionDirective(
9839 DKind: cast<OMPExecutableDirective>(Val: S)->getDirectiveKind());
9840
9841 if (RequiresDeviceCodegen) {
9842 const auto &E = *cast<OMPExecutableDirective>(Val: S);
9843
9844 llvm::TargetRegionEntryInfo EntryInfo = getEntryInfoFromPresumedLoc(
9845 CGM, OMPBuilder, BeginLoc: E.getBeginLoc(), ParentName);
9846
9847 // Is this a target region that should not be emitted as an entry point? If
9848 // so just signal we are done with this target region.
9849 if (!OMPBuilder.OffloadInfoManager.hasTargetRegionEntryInfo(EntryInfo))
9850 return;
9851
9852 switch (E.getDirectiveKind()) {
9853 case OMPD_target:
9854 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM, ParentName,
9855 S: cast<OMPTargetDirective>(Val: E));
9856 break;
9857 case OMPD_target_parallel:
9858 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9859 CGM, ParentName, S: cast<OMPTargetParallelDirective>(Val: E));
9860 break;
9861 case OMPD_target_teams:
9862 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9863 CGM, ParentName, S: cast<OMPTargetTeamsDirective>(Val: E));
9864 break;
9865 case OMPD_target_teams_distribute:
9866 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9867 CGM, ParentName, S: cast<OMPTargetTeamsDistributeDirective>(Val: E));
9868 break;
9869 case OMPD_target_teams_distribute_simd:
9870 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9871 CGM, ParentName, S: cast<OMPTargetTeamsDistributeSimdDirective>(Val: E));
9872 break;
9873 case OMPD_target_parallel_for:
9874 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9875 CGM, ParentName, S: cast<OMPTargetParallelForDirective>(Val: E));
9876 break;
9877 case OMPD_target_parallel_for_simd:
9878 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9879 CGM, ParentName, S: cast<OMPTargetParallelForSimdDirective>(Val: E));
9880 break;
9881 case OMPD_target_simd:
9882 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9883 CGM, ParentName, S: cast<OMPTargetSimdDirective>(Val: E));
9884 break;
9885 case OMPD_target_teams_distribute_parallel_for:
9886 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9887 CGM, ParentName,
9888 S: cast<OMPTargetTeamsDistributeParallelForDirective>(Val: E));
9889 break;
9890 case OMPD_target_teams_distribute_parallel_for_simd:
9891 CodeGenFunction::
9892 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9893 CGM, ParentName,
9894 S: cast<OMPTargetTeamsDistributeParallelForSimdDirective>(Val: E));
9895 break;
9896 case OMPD_target_teams_loop:
9897 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9898 CGM, ParentName, S: cast<OMPTargetTeamsGenericLoopDirective>(Val: E));
9899 break;
9900 case OMPD_target_parallel_loop:
9901 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9902 CGM, ParentName, S: cast<OMPTargetParallelGenericLoopDirective>(Val: E));
9903 break;
9904 case OMPD_parallel:
9905 case OMPD_for:
9906 case OMPD_parallel_for:
9907 case OMPD_parallel_master:
9908 case OMPD_parallel_sections:
9909 case OMPD_for_simd:
9910 case OMPD_parallel_for_simd:
9911 case OMPD_cancel:
9912 case OMPD_cancellation_point:
9913 case OMPD_ordered:
9914 case OMPD_threadprivate:
9915 case OMPD_allocate:
9916 case OMPD_task:
9917 case OMPD_simd:
9918 case OMPD_tile:
9919 case OMPD_unroll:
9920 case OMPD_sections:
9921 case OMPD_section:
9922 case OMPD_single:
9923 case OMPD_master:
9924 case OMPD_critical:
9925 case OMPD_taskyield:
9926 case OMPD_barrier:
9927 case OMPD_taskwait:
9928 case OMPD_taskgroup:
9929 case OMPD_atomic:
9930 case OMPD_flush:
9931 case OMPD_depobj:
9932 case OMPD_scan:
9933 case OMPD_teams:
9934 case OMPD_target_data:
9935 case OMPD_target_exit_data:
9936 case OMPD_target_enter_data:
9937 case OMPD_distribute:
9938 case OMPD_distribute_simd:
9939 case OMPD_distribute_parallel_for:
9940 case OMPD_distribute_parallel_for_simd:
9941 case OMPD_teams_distribute:
9942 case OMPD_teams_distribute_simd:
9943 case OMPD_teams_distribute_parallel_for:
9944 case OMPD_teams_distribute_parallel_for_simd:
9945 case OMPD_target_update:
9946 case OMPD_declare_simd:
9947 case OMPD_declare_variant:
9948 case OMPD_begin_declare_variant:
9949 case OMPD_end_declare_variant:
9950 case OMPD_declare_target:
9951 case OMPD_end_declare_target:
9952 case OMPD_declare_reduction:
9953 case OMPD_declare_mapper:
9954 case OMPD_taskloop:
9955 case OMPD_taskloop_simd:
9956 case OMPD_master_taskloop:
9957 case OMPD_master_taskloop_simd:
9958 case OMPD_parallel_master_taskloop:
9959 case OMPD_parallel_master_taskloop_simd:
9960 case OMPD_requires:
9961 case OMPD_metadirective:
9962 case OMPD_unknown:
9963 default:
9964 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9965 }
9966 return;
9967 }
9968
9969 if (const auto *E = dyn_cast<OMPExecutableDirective>(Val: S)) {
9970 if (!E->hasAssociatedStmt() || !E->getAssociatedStmt())
9971 return;
9972
9973 scanForTargetRegionsFunctions(S: E->getRawStmt(), ParentName);
9974 return;
9975 }
9976
9977 // If this is a lambda function, look into its body.
9978 if (const auto *L = dyn_cast<LambdaExpr>(Val: S))
9979 S = L->getBody();
9980
9981 // Keep looking for target regions recursively.
9982 for (const Stmt *II : S->children())
9983 scanForTargetRegionsFunctions(S: II, ParentName);
9984}
9985
9986static bool isAssumedToBeNotEmitted(const ValueDecl *VD, bool IsDevice) {
9987 std::optional<OMPDeclareTargetDeclAttr::DevTypeTy> DevTy =
9988 OMPDeclareTargetDeclAttr::getDeviceType(VD);
9989 if (!DevTy)
9990 return false;
9991 // Do not emit device_type(nohost) functions for the host.
9992 if (!IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_NoHost)
9993 return true;
9994 // Do not emit device_type(host) functions for the device.
9995 if (IsDevice && DevTy == OMPDeclareTargetDeclAttr::DT_Host)
9996 return true;
9997 return false;
9998}
9999
10000bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD) {
10001 // If emitting code for the host, we do not process FD here. Instead we do
10002 // the normal code generation.
10003 if (!CGM.getLangOpts().OpenMPIsTargetDevice) {
10004 if (const auto *FD = dyn_cast<FunctionDecl>(Val: GD.getDecl()))
10005 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
10006 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
10007 return true;
10008 return false;
10009 }
10010
10011 const ValueDecl *VD = cast<ValueDecl>(Val: GD.getDecl());
10012 // Try to detect target regions in the function.
10013 if (const auto *FD = dyn_cast<FunctionDecl>(Val: VD)) {
10014 StringRef Name = CGM.getMangledName(GD);
10015 scanForTargetRegionsFunctions(S: FD->getBody(), ParentName: Name);
10016 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: FD),
10017 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
10018 return true;
10019 }
10020
10021 // Do not to emit function if it is not marked as declare target.
10022 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD) &&
10023 AlreadyEmittedTargetDecls.count(V: VD) == 0;
10024}
10025
10026bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
10027 if (isAssumedToBeNotEmitted(VD: cast<ValueDecl>(Val: GD.getDecl()),
10028 IsDevice: CGM.getLangOpts().OpenMPIsTargetDevice))
10029 return true;
10030
10031 if (!CGM.getLangOpts().OpenMPIsTargetDevice)
10032 return false;
10033
10034 // Check if there are Ctors/Dtors in this declaration and look for target
10035 // regions in it. We use the complete variant to produce the kernel name
10036 // mangling.
10037 QualType RDTy = cast<VarDecl>(Val: GD.getDecl())->getType();
10038 if (const auto *RD = RDTy->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10039 for (const CXXConstructorDecl *Ctor : RD->ctors()) {
10040 StringRef ParentName =
10041 CGM.getMangledName(GD: GlobalDecl(Ctor, Ctor_Complete));
10042 scanForTargetRegionsFunctions(S: Ctor->getBody(), ParentName);
10043 }
10044 if (const CXXDestructorDecl *Dtor = RD->getDestructor()) {
10045 StringRef ParentName =
10046 CGM.getMangledName(GD: GlobalDecl(Dtor, Dtor_Complete));
10047 scanForTargetRegionsFunctions(S: Dtor->getBody(), ParentName);
10048 }
10049 }
10050
10051 // Do not to emit variable if it is not marked as declare target.
10052 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10053 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10054 VD: cast<VarDecl>(Val: GD.getDecl()));
10055 if (!Res || *Res == OMPDeclareTargetDeclAttr::MT_Link ||
10056 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10057 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10058 HasRequiresUnifiedSharedMemory)) {
10059 DeferredGlobalVariables.insert(V: cast<VarDecl>(Val: GD.getDecl()));
10060 return true;
10061 }
10062 return false;
10063}
10064
10065void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl *VD,
10066 llvm::Constant *Addr) {
10067 if (CGM.getLangOpts().OMPTargetTriples.empty() &&
10068 !CGM.getLangOpts().OpenMPIsTargetDevice)
10069 return;
10070
10071 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10072 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10073
10074 // If this is an 'extern' declaration we defer to the canonical definition and
10075 // do not emit an offloading entry.
10076 if (Res && *Res != OMPDeclareTargetDeclAttr::MT_Link &&
10077 VD->hasExternalStorage())
10078 return;
10079
10080 if (!Res) {
10081 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10082 // Register non-target variables being emitted in device code (debug info
10083 // may cause this).
10084 StringRef VarName = CGM.getMangledName(GD: VD);
10085 EmittedNonTargetVariables.try_emplace(Key: VarName, Args&: Addr);
10086 }
10087 return;
10088 }
10089
10090 auto AddrOfGlobal = [&VD, this]() { return CGM.GetAddrOfGlobal(GD: VD); };
10091 auto LinkageForVariable = [&VD, this]() {
10092 return CGM.getLLVMLinkageVarDefinition(VD);
10093 };
10094
10095 std::vector<llvm::GlobalVariable *> GeneratedRefs;
10096 OMPBuilder.registerTargetGlobalVariable(
10097 CaptureClause: convertCaptureClause(VD), DeviceClause: convertDeviceClause(VD),
10098 IsDeclaration: VD->hasDefinition(CGM.getContext()) == VarDecl::DeclarationOnly,
10099 IsExternallyVisible: VD->isExternallyVisible(),
10100 EntryInfo: getEntryInfoFromPresumedLoc(CGM, OMPBuilder,
10101 BeginLoc: VD->getCanonicalDecl()->getBeginLoc()),
10102 MangledName: CGM.getMangledName(GD: VD), GeneratedRefs, OpenMPSIMD: CGM.getLangOpts().OpenMPSimd,
10103 TargetTriple: CGM.getLangOpts().OMPTargetTriples, GlobalInitializer: AddrOfGlobal, VariableLinkage: LinkageForVariable,
10104 LlvmPtrTy: CGM.getTypes().ConvertTypeForMem(
10105 T: CGM.getContext().getPointerType(T: VD->getType())),
10106 Addr);
10107
10108 for (auto *ref : GeneratedRefs)
10109 CGM.addCompilerUsedGlobal(GV: ref);
10110}
10111
10112bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD) {
10113 if (isa<FunctionDecl>(Val: GD.getDecl()) ||
10114 isa<OMPDeclareReductionDecl>(Val: GD.getDecl()))
10115 return emitTargetFunctions(GD);
10116
10117 return emitTargetGlobalVariable(GD);
10118}
10119
10120void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10121 for (const VarDecl *VD : DeferredGlobalVariables) {
10122 std::optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
10123 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
10124 if (!Res)
10125 continue;
10126 if ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10127 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10128 !HasRequiresUnifiedSharedMemory) {
10129 CGM.EmitGlobal(D: VD);
10130 } else {
10131 assert((*Res == OMPDeclareTargetDeclAttr::MT_Link ||
10132 ((*Res == OMPDeclareTargetDeclAttr::MT_To ||
10133 *Res == OMPDeclareTargetDeclAttr::MT_Enter) &&
10134 HasRequiresUnifiedSharedMemory)) &&
10135 "Expected link clause or to clause with unified memory.");
10136 (void)CGM.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD);
10137 }
10138 }
10139}
10140
10141void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10142 CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
10143 assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
10144 " Expected target-based directive.");
10145}
10146
10147void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl *D) {
10148 for (const OMPClause *Clause : D->clauselists()) {
10149 if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
10150 HasRequiresUnifiedSharedMemory = true;
10151 OMPBuilder.Config.setHasRequiresUnifiedSharedMemory(true);
10152 } else if (const auto *AC =
10153 dyn_cast<OMPAtomicDefaultMemOrderClause>(Val: Clause)) {
10154 switch (AC->getAtomicDefaultMemOrderKind()) {
10155 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel:
10156 RequiresAtomicOrdering = llvm::AtomicOrdering::AcquireRelease;
10157 break;
10158 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst:
10159 RequiresAtomicOrdering = llvm::AtomicOrdering::SequentiallyConsistent;
10160 break;
10161 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed:
10162 RequiresAtomicOrdering = llvm::AtomicOrdering::Monotonic;
10163 break;
10164 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown:
10165 break;
10166 }
10167 }
10168 }
10169}
10170
10171llvm::AtomicOrdering CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10172 return RequiresAtomicOrdering;
10173}
10174
10175bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
10176 LangAS &AS) {
10177 if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
10178 return false;
10179 const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
10180 switch(A->getAllocatorType()) {
10181 case OMPAllocateDeclAttr::OMPNullMemAlloc:
10182 case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
10183 // Not supported, fallback to the default mem space.
10184 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
10185 case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
10186 case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
10187 case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
10188 case OMPAllocateDeclAttr::OMPThreadMemAlloc:
10189 case OMPAllocateDeclAttr::OMPConstMemAlloc:
10190 case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
10191 AS = LangAS::Default;
10192 return true;
10193 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
10194 llvm_unreachable("Expected predefined allocator for the variables with the "
10195 "static storage.");
10196 }
10197 return false;
10198}
10199
10200bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10201 return HasRequiresUnifiedSharedMemory;
10202}
10203
10204CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10205 CodeGenModule &CGM)
10206 : CGM(CGM) {
10207 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
10208 SavedShouldMarkAsGlobal = CGM.getOpenMPRuntime().ShouldMarkAsGlobal;
10209 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = false;
10210 }
10211}
10212
10213CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10214 if (CGM.getLangOpts().OpenMPIsTargetDevice)
10215 CGM.getOpenMPRuntime().ShouldMarkAsGlobal = SavedShouldMarkAsGlobal;
10216}
10217
10218bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD) {
10219 if (!CGM.getLangOpts().OpenMPIsTargetDevice || !ShouldMarkAsGlobal)
10220 return true;
10221
10222 const auto *D = cast<FunctionDecl>(Val: GD.getDecl());
10223 // Do not to emit function if it is marked as declare target as it was already
10224 // emitted.
10225 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: D)) {
10226 if (D->hasBody() && AlreadyEmittedTargetDecls.count(V: D) == 0) {
10227 if (auto *F = dyn_cast_or_null<llvm::Function>(
10228 Val: CGM.GetGlobalValue(Ref: CGM.getMangledName(GD))))
10229 return !F->isDeclaration();
10230 return false;
10231 }
10232 return true;
10233 }
10234
10235 return !AlreadyEmittedTargetDecls.insert(V: D).second;
10236}
10237
10238void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction &CGF,
10239 const OMPExecutableDirective &D,
10240 SourceLocation Loc,
10241 llvm::Function *OutlinedFn,
10242 ArrayRef<llvm::Value *> CapturedVars) {
10243 if (!CGF.HaveInsertPoint())
10244 return;
10245
10246 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10247 CodeGenFunction::RunCleanupsScope Scope(CGF);
10248
10249 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10250 llvm::Value *Args[] = {
10251 RTLoc,
10252 CGF.Builder.getInt32(C: CapturedVars.size()), // Number of captured vars
10253 OutlinedFn};
10254 llvm::SmallVector<llvm::Value *, 16> RealArgs;
10255 RealArgs.append(in_start: std::begin(arr&: Args), in_end: std::end(arr&: Args));
10256 RealArgs.append(in_start: CapturedVars.begin(), in_end: CapturedVars.end());
10257
10258 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
10259 M&: CGM.getModule(), FnID: OMPRTL___kmpc_fork_teams);
10260 CGF.EmitRuntimeCall(callee: RTLFn, args: RealArgs);
10261}
10262
10263void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
10264 const Expr *NumTeams,
10265 const Expr *ThreadLimit,
10266 SourceLocation Loc) {
10267 if (!CGF.HaveInsertPoint())
10268 return;
10269
10270 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10271
10272 llvm::Value *NumTeamsVal =
10273 NumTeams
10274 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: NumTeams),
10275 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10276 : CGF.Builder.getInt32(C: 0);
10277
10278 llvm::Value *ThreadLimitVal =
10279 ThreadLimit
10280 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10281 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10282 : CGF.Builder.getInt32(C: 0);
10283
10284 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10285 llvm::Value *PushNumTeamsArgs[] = {RTLoc, getThreadID(CGF, Loc), NumTeamsVal,
10286 ThreadLimitVal};
10287 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10288 M&: CGM.getModule(), FnID: OMPRTL___kmpc_push_num_teams),
10289 args: PushNumTeamsArgs);
10290}
10291
10292void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction &CGF,
10293 const Expr *ThreadLimit,
10294 SourceLocation Loc) {
10295 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
10296 llvm::Value *ThreadLimitVal =
10297 ThreadLimit
10298 ? CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: ThreadLimit),
10299 DestTy: CGF.CGM.Int32Ty, /* isSigned = */ true)
10300 : CGF.Builder.getInt32(C: 0);
10301
10302 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10303 llvm::Value *ThreadLimitArgs[] = {RTLoc, getThreadID(CGF, Loc),
10304 ThreadLimitVal};
10305 CGF.EmitRuntimeCall(callee: OMPBuilder.getOrCreateRuntimeFunction(
10306 M&: CGM.getModule(), FnID: OMPRTL___kmpc_set_thread_limit),
10307 args: ThreadLimitArgs);
10308}
10309
10310void CGOpenMPRuntime::emitTargetDataCalls(
10311 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10312 const Expr *Device, const RegionCodeGenTy &CodeGen,
10313 CGOpenMPRuntime::TargetDataInfo &Info) {
10314 if (!CGF.HaveInsertPoint())
10315 return;
10316
10317 // Action used to replace the default codegen action and turn privatization
10318 // off.
10319 PrePostActionTy NoPrivAction;
10320
10321 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
10322
10323 llvm::Value *IfCondVal = nullptr;
10324 if (IfCond)
10325 IfCondVal = CGF.EvaluateExprAsBool(E: IfCond);
10326
10327 // Emit device ID if any.
10328 llvm::Value *DeviceID = nullptr;
10329 if (Device) {
10330 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10331 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10332 } else {
10333 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10334 }
10335
10336 // Fill up the arrays with all the mapped variables.
10337 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10338 auto GenMapInfoCB =
10339 [&](InsertPointTy CodeGenIP) -> llvm::OpenMPIRBuilder::MapInfosTy & {
10340 CGF.Builder.restoreIP(IP: CodeGenIP);
10341 // Get map clause information.
10342 MappableExprsHandler MEHandler(D, CGF);
10343 MEHandler.generateAllInfo(CombinedInfo, OMPBuilder);
10344
10345 auto FillInfoMap = [&](MappableExprsHandler::MappingExprInfo &MapExpr) {
10346 return emitMappingInformation(CGF, OMPBuilder, MapExprs&: MapExpr);
10347 };
10348 if (CGM.getCodeGenOpts().getDebugInfo() !=
10349 llvm::codegenoptions::NoDebugInfo) {
10350 CombinedInfo.Names.resize(N: CombinedInfo.Exprs.size());
10351 llvm::transform(Range&: CombinedInfo.Exprs, d_first: CombinedInfo.Names.begin(),
10352 F: FillInfoMap);
10353 }
10354
10355 return CombinedInfo;
10356 };
10357 using BodyGenTy = llvm::OpenMPIRBuilder::BodyGenTy;
10358 auto BodyCB = [&](InsertPointTy CodeGenIP, BodyGenTy BodyGenType) {
10359 CGF.Builder.restoreIP(IP: CodeGenIP);
10360 switch (BodyGenType) {
10361 case BodyGenTy::Priv:
10362 if (!Info.CaptureDeviceAddrMap.empty())
10363 CodeGen(CGF);
10364 break;
10365 case BodyGenTy::DupNoPriv:
10366 if (!Info.CaptureDeviceAddrMap.empty()) {
10367 CodeGen.setAction(NoPrivAction);
10368 CodeGen(CGF);
10369 }
10370 break;
10371 case BodyGenTy::NoPriv:
10372 if (Info.CaptureDeviceAddrMap.empty()) {
10373 CodeGen.setAction(NoPrivAction);
10374 CodeGen(CGF);
10375 }
10376 break;
10377 }
10378 return InsertPointTy(CGF.Builder.GetInsertBlock(),
10379 CGF.Builder.GetInsertPoint());
10380 };
10381
10382 auto DeviceAddrCB = [&](unsigned int I, llvm::Value *NewDecl) {
10383 if (const ValueDecl *DevVD = CombinedInfo.DevicePtrDecls[I]) {
10384 Info.CaptureDeviceAddrMap.try_emplace(Key: DevVD, Args&: NewDecl);
10385 }
10386 };
10387
10388 auto CustomMapperCB = [&](unsigned int I) {
10389 llvm::Function *MFunc = nullptr;
10390 if (CombinedInfo.Mappers[I]) {
10391 Info.HasMapper = true;
10392 MFunc = CGF.CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10393 D: cast<OMPDeclareMapperDecl>(Val: CombinedInfo.Mappers[I]));
10394 }
10395 return MFunc;
10396 };
10397
10398 // Source location for the ident struct
10399 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10400
10401 InsertPointTy AllocaIP(CGF.AllocaInsertPt->getParent(),
10402 CGF.AllocaInsertPt->getIterator());
10403 InsertPointTy CodeGenIP(CGF.Builder.GetInsertBlock(),
10404 CGF.Builder.GetInsertPoint());
10405 llvm::OpenMPIRBuilder::LocationDescription OmpLoc(CodeGenIP);
10406 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
10407 cantFail(ValOrErr: OMPBuilder.createTargetData(
10408 Loc: OmpLoc, AllocaIP, CodeGenIP, DeviceID, IfCond: IfCondVal, Info, GenMapInfoCB,
10409 CustomMapperCB,
10410 /*MapperFunc=*/nullptr, BodyGenCB: BodyCB, DeviceAddrCB, SrcLocInfo: RTLoc));
10411 CGF.Builder.restoreIP(IP: AfterIP);
10412}
10413
10414void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10415 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
10416 const Expr *Device) {
10417 if (!CGF.HaveInsertPoint())
10418 return;
10419
10420 assert((isa<OMPTargetEnterDataDirective>(D) ||
10421 isa<OMPTargetExitDataDirective>(D) ||
10422 isa<OMPTargetUpdateDirective>(D)) &&
10423 "Expecting either target enter, exit data, or update directives.");
10424
10425 CodeGenFunction::OMPTargetDataInfo InputInfo;
10426 llvm::Value *MapTypesArray = nullptr;
10427 llvm::Value *MapNamesArray = nullptr;
10428 // Generate the code for the opening of the data environment.
10429 auto &&ThenGen = [this, &D, Device, &InputInfo, &MapTypesArray,
10430 &MapNamesArray](CodeGenFunction &CGF, PrePostActionTy &) {
10431 // Emit device ID if any.
10432 llvm::Value *DeviceID = nullptr;
10433 if (Device) {
10434 DeviceID = CGF.Builder.CreateIntCast(V: CGF.EmitScalarExpr(E: Device),
10435 DestTy: CGF.Int64Ty, /*isSigned=*/true);
10436 } else {
10437 DeviceID = CGF.Builder.getInt64(C: OMP_DEVICEID_UNDEF);
10438 }
10439
10440 // Emit the number of elements in the offloading arrays.
10441 llvm::Constant *PointerNum =
10442 CGF.Builder.getInt32(C: InputInfo.NumberOfTargetItems);
10443
10444 // Source location for the ident struct
10445 llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc: D.getBeginLoc());
10446
10447 SmallVector<llvm::Value *, 13> OffloadingArgs(
10448 {RTLoc, DeviceID, PointerNum,
10449 InputInfo.BasePointersArray.emitRawPointer(CGF),
10450 InputInfo.PointersArray.emitRawPointer(CGF),
10451 InputInfo.SizesArray.emitRawPointer(CGF), MapTypesArray, MapNamesArray,
10452 InputInfo.MappersArray.emitRawPointer(CGF)});
10453
10454 // Select the right runtime function call for each standalone
10455 // directive.
10456 const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
10457 RuntimeFunction RTLFn;
10458 switch (D.getDirectiveKind()) {
10459 case OMPD_target_enter_data:
10460 RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
10461 : OMPRTL___tgt_target_data_begin_mapper;
10462 break;
10463 case OMPD_target_exit_data:
10464 RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
10465 : OMPRTL___tgt_target_data_end_mapper;
10466 break;
10467 case OMPD_target_update:
10468 RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
10469 : OMPRTL___tgt_target_data_update_mapper;
10470 break;
10471 case OMPD_parallel:
10472 case OMPD_for:
10473 case OMPD_parallel_for:
10474 case OMPD_parallel_master:
10475 case OMPD_parallel_sections:
10476 case OMPD_for_simd:
10477 case OMPD_parallel_for_simd:
10478 case OMPD_cancel:
10479 case OMPD_cancellation_point:
10480 case OMPD_ordered:
10481 case OMPD_threadprivate:
10482 case OMPD_allocate:
10483 case OMPD_task:
10484 case OMPD_simd:
10485 case OMPD_tile:
10486 case OMPD_unroll:
10487 case OMPD_sections:
10488 case OMPD_section:
10489 case OMPD_single:
10490 case OMPD_master:
10491 case OMPD_critical:
10492 case OMPD_taskyield:
10493 case OMPD_barrier:
10494 case OMPD_taskwait:
10495 case OMPD_taskgroup:
10496 case OMPD_atomic:
10497 case OMPD_flush:
10498 case OMPD_depobj:
10499 case OMPD_scan:
10500 case OMPD_teams:
10501 case OMPD_target_data:
10502 case OMPD_distribute:
10503 case OMPD_distribute_simd:
10504 case OMPD_distribute_parallel_for:
10505 case OMPD_distribute_parallel_for_simd:
10506 case OMPD_teams_distribute:
10507 case OMPD_teams_distribute_simd:
10508 case OMPD_teams_distribute_parallel_for:
10509 case OMPD_teams_distribute_parallel_for_simd:
10510 case OMPD_declare_simd:
10511 case OMPD_declare_variant:
10512 case OMPD_begin_declare_variant:
10513 case OMPD_end_declare_variant:
10514 case OMPD_declare_target:
10515 case OMPD_end_declare_target:
10516 case OMPD_declare_reduction:
10517 case OMPD_declare_mapper:
10518 case OMPD_taskloop:
10519 case OMPD_taskloop_simd:
10520 case OMPD_master_taskloop:
10521 case OMPD_master_taskloop_simd:
10522 case OMPD_parallel_master_taskloop:
10523 case OMPD_parallel_master_taskloop_simd:
10524 case OMPD_target:
10525 case OMPD_target_simd:
10526 case OMPD_target_teams_distribute:
10527 case OMPD_target_teams_distribute_simd:
10528 case OMPD_target_teams_distribute_parallel_for:
10529 case OMPD_target_teams_distribute_parallel_for_simd:
10530 case OMPD_target_teams:
10531 case OMPD_target_parallel:
10532 case OMPD_target_parallel_for:
10533 case OMPD_target_parallel_for_simd:
10534 case OMPD_requires:
10535 case OMPD_metadirective:
10536 case OMPD_unknown:
10537 default:
10538 llvm_unreachable("Unexpected standalone target data directive.");
10539 break;
10540 }
10541 if (HasNowait) {
10542 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10543 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10544 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.Int32Ty));
10545 OffloadingArgs.push_back(Elt: llvm::Constant::getNullValue(Ty: CGF.VoidPtrTy));
10546 }
10547 CGF.EmitRuntimeCall(
10548 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID: RTLFn),
10549 args: OffloadingArgs);
10550 };
10551
10552 auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray,
10553 &MapNamesArray](CodeGenFunction &CGF,
10554 PrePostActionTy &) {
10555 // Fill up the arrays with all the mapped variables.
10556 MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
10557 CGOpenMPRuntime::TargetDataInfo Info;
10558 MappableExprsHandler MEHandler(D, CGF);
10559 genMapInfo(MEHandler, CGF, CombinedInfo, OMPBuilder);
10560 emitOffloadingArraysAndArgs(CGF, CombinedInfo, Info, OMPBuilder,
10561 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10562
10563 bool RequiresOuterTask = D.hasClausesOfKind<OMPDependClause>() ||
10564 D.hasClausesOfKind<OMPNowaitClause>();
10565
10566 InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
10567 InputInfo.BasePointersArray = Address(Info.RTArgs.BasePointersArray,
10568 CGF.VoidPtrTy, CGM.getPointerAlign());
10569 InputInfo.PointersArray = Address(Info.RTArgs.PointersArray, CGF.VoidPtrTy,
10570 CGM.getPointerAlign());
10571 InputInfo.SizesArray =
10572 Address(Info.RTArgs.SizesArray, CGF.Int64Ty, CGM.getPointerAlign());
10573 InputInfo.MappersArray =
10574 Address(Info.RTArgs.MappersArray, CGF.VoidPtrTy, CGM.getPointerAlign());
10575 MapTypesArray = Info.RTArgs.MapTypesArray;
10576 MapNamesArray = Info.RTArgs.MapNamesArray;
10577 if (RequiresOuterTask)
10578 CGF.EmitOMPTargetTaskBasedDirective(S: D, BodyGen: ThenGen, InputInfo);
10579 else
10580 emitInlinedDirective(CGF, InnerKind: D.getDirectiveKind(), CodeGen: ThenGen);
10581 };
10582
10583 if (IfCond) {
10584 emitIfClause(CGF, Cond: IfCond, ThenGen: TargetThenGen,
10585 ElseGen: [](CodeGenFunction &CGF, PrePostActionTy &) {});
10586 } else {
10587 RegionCodeGenTy ThenRCG(TargetThenGen);
10588 ThenRCG(CGF);
10589 }
10590}
10591
10592namespace {
10593 /// Kind of parameter in a function with 'declare simd' directive.
10594enum ParamKindTy {
10595 Linear,
10596 LinearRef,
10597 LinearUVal,
10598 LinearVal,
10599 Uniform,
10600 Vector,
10601};
10602/// Attribute set of the parameter.
10603struct ParamAttrTy {
10604 ParamKindTy Kind = Vector;
10605 llvm::APSInt StrideOrArg;
10606 llvm::APSInt Alignment;
10607 bool HasVarStride = false;
10608};
10609} // namespace
10610
10611static unsigned evaluateCDTSize(const FunctionDecl *FD,
10612 ArrayRef<ParamAttrTy> ParamAttrs) {
10613 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10614 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10615 // of that clause. The VLEN value must be power of 2.
10616 // In other case the notion of the function`s "characteristic data type" (CDT)
10617 // is used to compute the vector length.
10618 // CDT is defined in the following order:
10619 // a) For non-void function, the CDT is the return type.
10620 // b) If the function has any non-uniform, non-linear parameters, then the
10621 // CDT is the type of the first such parameter.
10622 // c) If the CDT determined by a) or b) above is struct, union, or class
10623 // type which is pass-by-value (except for the type that maps to the
10624 // built-in complex data type), the characteristic data type is int.
10625 // d) If none of the above three cases is applicable, the CDT is int.
10626 // The VLEN is then determined based on the CDT and the size of vector
10627 // register of that ISA for which current vector version is generated. The
10628 // VLEN is computed using the formula below:
10629 // VLEN = sizeof(vector_register) / sizeof(CDT),
10630 // where vector register size specified in section 3.2.1 Registers and the
10631 // Stack Frame of original AMD64 ABI document.
10632 QualType RetType = FD->getReturnType();
10633 if (RetType.isNull())
10634 return 0;
10635 ASTContext &C = FD->getASTContext();
10636 QualType CDT;
10637 if (!RetType.isNull() && !RetType->isVoidType()) {
10638 CDT = RetType;
10639 } else {
10640 unsigned Offset = 0;
10641 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FD)) {
10642 if (ParamAttrs[Offset].Kind == Vector)
10643 CDT = C.getPointerType(T: C.getRecordType(Decl: MD->getParent()));
10644 ++Offset;
10645 }
10646 if (CDT.isNull()) {
10647 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10648 if (ParamAttrs[I + Offset].Kind == Vector) {
10649 CDT = FD->getParamDecl(i: I)->getType();
10650 break;
10651 }
10652 }
10653 }
10654 }
10655 if (CDT.isNull())
10656 CDT = C.IntTy;
10657 CDT = CDT->getCanonicalTypeUnqualified();
10658 if (CDT->isRecordType() || CDT->isUnionType())
10659 CDT = C.IntTy;
10660 return C.getTypeSize(T: CDT);
10661}
10662
10663/// Mangle the parameter part of the vector function name according to
10664/// their OpenMP classification. The mangling function is defined in
10665/// section 4.5 of the AAVFABI(2021Q1).
10666static std::string mangleVectorParameters(ArrayRef<ParamAttrTy> ParamAttrs) {
10667 SmallString<256> Buffer;
10668 llvm::raw_svector_ostream Out(Buffer);
10669 for (const auto &ParamAttr : ParamAttrs) {
10670 switch (ParamAttr.Kind) {
10671 case Linear:
10672 Out << 'l';
10673 break;
10674 case LinearRef:
10675 Out << 'R';
10676 break;
10677 case LinearUVal:
10678 Out << 'U';
10679 break;
10680 case LinearVal:
10681 Out << 'L';
10682 break;
10683 case Uniform:
10684 Out << 'u';
10685 break;
10686 case Vector:
10687 Out << 'v';
10688 break;
10689 }
10690 if (ParamAttr.HasVarStride)
10691 Out << "s" << ParamAttr.StrideOrArg;
10692 else if (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef ||
10693 ParamAttr.Kind == LinearUVal || ParamAttr.Kind == LinearVal) {
10694 // Don't print the step value if it is not present or if it is
10695 // equal to 1.
10696 if (ParamAttr.StrideOrArg < 0)
10697 Out << 'n' << -ParamAttr.StrideOrArg;
10698 else if (ParamAttr.StrideOrArg != 1)
10699 Out << ParamAttr.StrideOrArg;
10700 }
10701
10702 if (!!ParamAttr.Alignment)
10703 Out << 'a' << ParamAttr.Alignment;
10704 }
10705
10706 return std::string(Out.str());
10707}
10708
10709static void
10710emitX86DeclareSimdFunction(const FunctionDecl *FD, llvm::Function *Fn,
10711 const llvm::APSInt &VLENVal,
10712 ArrayRef<ParamAttrTy> ParamAttrs,
10713 OMPDeclareSimdDeclAttr::BranchStateTy State) {
10714 struct ISADataTy {
10715 char ISA;
10716 unsigned VecRegSize;
10717 };
10718 ISADataTy ISAData[] = {
10719 {
10720 .ISA: 'b', .VecRegSize: 128
10721 }, // SSE
10722 {
10723 .ISA: 'c', .VecRegSize: 256
10724 }, // AVX
10725 {
10726 .ISA: 'd', .VecRegSize: 256
10727 }, // AVX2
10728 {
10729 .ISA: 'e', .VecRegSize: 512
10730 }, // AVX512
10731 };
10732 llvm::SmallVector<char, 2> Masked;
10733 switch (State) {
10734 case OMPDeclareSimdDeclAttr::BS_Undefined:
10735 Masked.push_back(Elt: 'N');
10736 Masked.push_back(Elt: 'M');
10737 break;
10738 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10739 Masked.push_back(Elt: 'N');
10740 break;
10741 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10742 Masked.push_back(Elt: 'M');
10743 break;
10744 }
10745 for (char Mask : Masked) {
10746 for (const ISADataTy &Data : ISAData) {
10747 SmallString<256> Buffer;
10748 llvm::raw_svector_ostream Out(Buffer);
10749 Out << "_ZGV" << Data.ISA << Mask;
10750 if (!VLENVal) {
10751 unsigned NumElts = evaluateCDTSize(FD, ParamAttrs);
10752 assert(NumElts && "Non-zero simdlen/cdtsize expected");
10753 Out << llvm::APSInt::getUnsigned(X: Data.VecRegSize / NumElts);
10754 } else {
10755 Out << VLENVal;
10756 }
10757 Out << mangleVectorParameters(ParamAttrs);
10758 Out << '_' << Fn->getName();
10759 Fn->addFnAttr(Kind: Out.str());
10760 }
10761 }
10762}
10763
10764// This are the Functions that are needed to mangle the name of the
10765// vector functions generated by the compiler, according to the rules
10766// defined in the "Vector Function ABI specifications for AArch64",
10767// available at
10768// https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10769
10770/// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10771static bool getAArch64MTV(QualType QT, ParamKindTy Kind) {
10772 QT = QT.getCanonicalType();
10773
10774 if (QT->isVoidType())
10775 return false;
10776
10777 if (Kind == ParamKindTy::Uniform)
10778 return false;
10779
10780 if (Kind == ParamKindTy::LinearUVal || Kind == ParamKindTy::LinearRef)
10781 return false;
10782
10783 if ((Kind == ParamKindTy::Linear || Kind == ParamKindTy::LinearVal) &&
10784 !QT->isReferenceType())
10785 return false;
10786
10787 return true;
10788}
10789
10790/// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10791static bool getAArch64PBV(QualType QT, ASTContext &C) {
10792 QT = QT.getCanonicalType();
10793 unsigned Size = C.getTypeSize(T: QT);
10794
10795 // Only scalars and complex within 16 bytes wide set PVB to true.
10796 if (Size != 8 && Size != 16 && Size != 32 && Size != 64 && Size != 128)
10797 return false;
10798
10799 if (QT->isFloatingType())
10800 return true;
10801
10802 if (QT->isIntegerType())
10803 return true;
10804
10805 if (QT->isPointerType())
10806 return true;
10807
10808 // TODO: Add support for complex types (section 3.1.2, item 2).
10809
10810 return false;
10811}
10812
10813/// Computes the lane size (LS) of a return type or of an input parameter,
10814/// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10815/// TODO: Add support for references, section 3.2.1, item 1.
10816static unsigned getAArch64LS(QualType QT, ParamKindTy Kind, ASTContext &C) {
10817 if (!getAArch64MTV(QT, Kind) && QT.getCanonicalType()->isPointerType()) {
10818 QualType PTy = QT.getCanonicalType()->getPointeeType();
10819 if (getAArch64PBV(QT: PTy, C))
10820 return C.getTypeSize(T: PTy);
10821 }
10822 if (getAArch64PBV(QT, C))
10823 return C.getTypeSize(T: QT);
10824
10825 return C.getTypeSize(T: C.getUIntPtrType());
10826}
10827
10828// Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10829// signature of the scalar function, as defined in 3.2.2 of the
10830// AAVFABI.
10831static std::tuple<unsigned, unsigned, bool>
10832getNDSWDS(const FunctionDecl *FD, ArrayRef<ParamAttrTy> ParamAttrs) {
10833 QualType RetType = FD->getReturnType().getCanonicalType();
10834
10835 ASTContext &C = FD->getASTContext();
10836
10837 bool OutputBecomesInput = false;
10838
10839 llvm::SmallVector<unsigned, 8> Sizes;
10840 if (!RetType->isVoidType()) {
10841 Sizes.push_back(Elt: getAArch64LS(QT: RetType, Kind: ParamKindTy::Vector, C));
10842 if (!getAArch64PBV(QT: RetType, C) && getAArch64MTV(QT: RetType, Kind: {}))
10843 OutputBecomesInput = true;
10844 }
10845 for (unsigned I = 0, E = FD->getNumParams(); I < E; ++I) {
10846 QualType QT = FD->getParamDecl(i: I)->getType().getCanonicalType();
10847 Sizes.push_back(Elt: getAArch64LS(QT, Kind: ParamAttrs[I].Kind, C));
10848 }
10849
10850 assert(!Sizes.empty() && "Unable to determine NDS and WDS.");
10851 // The LS of a function parameter / return value can only be a power
10852 // of 2, starting from 8 bits, up to 128.
10853 assert(llvm::all_of(Sizes,
10854 [](unsigned Size) {
10855 return Size == 8 || Size == 16 || Size == 32 ||
10856 Size == 64 || Size == 128;
10857 }) &&
10858 "Invalid size");
10859
10860 return std::make_tuple(args&: *llvm::min_element(Range&: Sizes), args&: *llvm::max_element(Range&: Sizes),
10861 args&: OutputBecomesInput);
10862}
10863
10864// Function used to add the attribute. The parameter `VLEN` is
10865// templated to allow the use of "x" when targeting scalable functions
10866// for SVE.
10867template <typename T>
10868static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix,
10869 char ISA, StringRef ParSeq,
10870 StringRef MangledName, bool OutputBecomesInput,
10871 llvm::Function *Fn) {
10872 SmallString<256> Buffer;
10873 llvm::raw_svector_ostream Out(Buffer);
10874 Out << Prefix << ISA << LMask << VLEN;
10875 if (OutputBecomesInput)
10876 Out << "v";
10877 Out << ParSeq << "_" << MangledName;
10878 Fn->addFnAttr(Kind: Out.str());
10879}
10880
10881// Helper function to generate the Advanced SIMD names depending on
10882// the value of the NDS when simdlen is not present.
10883static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask,
10884 StringRef Prefix, char ISA,
10885 StringRef ParSeq, StringRef MangledName,
10886 bool OutputBecomesInput,
10887 llvm::Function *Fn) {
10888 switch (NDS) {
10889 case 8:
10890 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10891 OutputBecomesInput, Fn);
10892 addAArch64VectorName(VLEN: 16, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10893 OutputBecomesInput, Fn);
10894 break;
10895 case 16:
10896 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10897 OutputBecomesInput, Fn);
10898 addAArch64VectorName(VLEN: 8, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10899 OutputBecomesInput, Fn);
10900 break;
10901 case 32:
10902 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10903 OutputBecomesInput, Fn);
10904 addAArch64VectorName(VLEN: 4, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10905 OutputBecomesInput, Fn);
10906 break;
10907 case 64:
10908 case 128:
10909 addAArch64VectorName(VLEN: 2, LMask: Mask, Prefix, ISA, ParSeq, MangledName,
10910 OutputBecomesInput, Fn);
10911 break;
10912 default:
10913 llvm_unreachable("Scalar type is too wide.");
10914 }
10915}
10916
10917/// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10918static void emitAArch64DeclareSimdFunction(
10919 CodeGenModule &CGM, const FunctionDecl *FD, unsigned UserVLEN,
10920 ArrayRef<ParamAttrTy> ParamAttrs,
10921 OMPDeclareSimdDeclAttr::BranchStateTy State, StringRef MangledName,
10922 char ISA, unsigned VecRegSize, llvm::Function *Fn, SourceLocation SLoc) {
10923
10924 // Get basic data for building the vector signature.
10925 const auto Data = getNDSWDS(FD, ParamAttrs);
10926 const unsigned NDS = std::get<0>(t: Data);
10927 const unsigned WDS = std::get<1>(t: Data);
10928 const bool OutputBecomesInput = std::get<2>(t: Data);
10929
10930 // Check the values provided via `simdlen` by the user.
10931 // 1. A `simdlen(1)` doesn't produce vector signatures,
10932 if (UserVLEN == 1) {
10933 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10934 L: DiagnosticsEngine::Warning,
10935 FormatString: "The clause simdlen(1) has no effect when targeting aarch64.");
10936 CGM.getDiags().Report(Loc: SLoc, DiagID);
10937 return;
10938 }
10939
10940 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10941 // Advanced SIMD output.
10942 if (ISA == 'n' && UserVLEN && !llvm::isPowerOf2_32(Value: UserVLEN)) {
10943 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10944 L: DiagnosticsEngine::Warning, FormatString: "The value specified in simdlen must be a "
10945 "power of 2 when targeting Advanced SIMD.");
10946 CGM.getDiags().Report(Loc: SLoc, DiagID);
10947 return;
10948 }
10949
10950 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10951 // limits.
10952 if (ISA == 's' && UserVLEN != 0) {
10953 if ((UserVLEN * WDS > 2048) || (UserVLEN * WDS % 128 != 0)) {
10954 unsigned DiagID = CGM.getDiags().getCustomDiagID(
10955 L: DiagnosticsEngine::Warning, FormatString: "The clause simdlen must fit the %0-bit "
10956 "lanes in the architectural constraints "
10957 "for SVE (min is 128-bit, max is "
10958 "2048-bit, by steps of 128-bit)");
10959 CGM.getDiags().Report(Loc: SLoc, DiagID) << WDS;
10960 return;
10961 }
10962 }
10963
10964 // Sort out parameter sequence.
10965 const std::string ParSeq = mangleVectorParameters(ParamAttrs);
10966 StringRef Prefix = "_ZGV";
10967 // Generate simdlen from user input (if any).
10968 if (UserVLEN) {
10969 if (ISA == 's') {
10970 // SVE generates only a masked function.
10971 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10972 OutputBecomesInput, Fn);
10973 } else {
10974 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
10975 // Advanced SIMD generates one or two functions, depending on
10976 // the `[not]inbranch` clause.
10977 switch (State) {
10978 case OMPDeclareSimdDeclAttr::BS_Undefined:
10979 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10980 OutputBecomesInput, Fn);
10981 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10982 OutputBecomesInput, Fn);
10983 break;
10984 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
10985 addAArch64VectorName(VLEN: UserVLEN, LMask: "N", Prefix, ISA, ParSeq, MangledName,
10986 OutputBecomesInput, Fn);
10987 break;
10988 case OMPDeclareSimdDeclAttr::BS_Inbranch:
10989 addAArch64VectorName(VLEN: UserVLEN, LMask: "M", Prefix, ISA, ParSeq, MangledName,
10990 OutputBecomesInput, Fn);
10991 break;
10992 }
10993 }
10994 } else {
10995 // If no user simdlen is provided, follow the AAVFABI rules for
10996 // generating the vector length.
10997 if (ISA == 's') {
10998 // SVE, section 3.4.1, item 1.
10999 addAArch64VectorName(VLEN: "x", LMask: "M", Prefix, ISA, ParSeq, MangledName,
11000 OutputBecomesInput, Fn);
11001 } else {
11002 assert(ISA == 'n' && "Expected ISA either 's' or 'n'.");
11003 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11004 // two vector names depending on the use of the clause
11005 // `[not]inbranch`.
11006 switch (State) {
11007 case OMPDeclareSimdDeclAttr::BS_Undefined:
11008 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
11009 OutputBecomesInput, Fn);
11010 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
11011 OutputBecomesInput, Fn);
11012 break;
11013 case OMPDeclareSimdDeclAttr::BS_Notinbranch:
11014 addAArch64AdvSIMDNDSNames(NDS, Mask: "N", Prefix, ISA, ParSeq, MangledName,
11015 OutputBecomesInput, Fn);
11016 break;
11017 case OMPDeclareSimdDeclAttr::BS_Inbranch:
11018 addAArch64AdvSIMDNDSNames(NDS, Mask: "M", Prefix, ISA, ParSeq, MangledName,
11019 OutputBecomesInput, Fn);
11020 break;
11021 }
11022 }
11023 }
11024}
11025
11026void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl *FD,
11027 llvm::Function *Fn) {
11028 ASTContext &C = CGM.getContext();
11029 FD = FD->getMostRecentDecl();
11030 while (FD) {
11031 // Map params to their positions in function decl.
11032 llvm::DenseMap<const Decl *, unsigned> ParamPositions;
11033 if (isa<CXXMethodDecl>(Val: FD))
11034 ParamPositions.try_emplace(Key: FD, Args: 0);
11035 unsigned ParamPos = ParamPositions.size();
11036 for (const ParmVarDecl *P : FD->parameters()) {
11037 ParamPositions.try_emplace(Key: P->getCanonicalDecl(), Args&: ParamPos);
11038 ++ParamPos;
11039 }
11040 for (const auto *Attr : FD->specific_attrs<OMPDeclareSimdDeclAttr>()) {
11041 llvm::SmallVector<ParamAttrTy, 8> ParamAttrs(ParamPositions.size());
11042 // Mark uniform parameters.
11043 for (const Expr *E : Attr->uniforms()) {
11044 E = E->IgnoreParenImpCasts();
11045 unsigned Pos;
11046 if (isa<CXXThisExpr>(Val: E)) {
11047 Pos = ParamPositions[FD];
11048 } else {
11049 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11050 ->getCanonicalDecl();
11051 auto It = ParamPositions.find(Val: PVD);
11052 assert(It != ParamPositions.end() && "Function parameter not found");
11053 Pos = It->second;
11054 }
11055 ParamAttrs[Pos].Kind = Uniform;
11056 }
11057 // Get alignment info.
11058 auto *NI = Attr->alignments_begin();
11059 for (const Expr *E : Attr->aligneds()) {
11060 E = E->IgnoreParenImpCasts();
11061 unsigned Pos;
11062 QualType ParmTy;
11063 if (isa<CXXThisExpr>(Val: E)) {
11064 Pos = ParamPositions[FD];
11065 ParmTy = E->getType();
11066 } else {
11067 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11068 ->getCanonicalDecl();
11069 auto It = ParamPositions.find(Val: PVD);
11070 assert(It != ParamPositions.end() && "Function parameter not found");
11071 Pos = It->second;
11072 ParmTy = PVD->getType();
11073 }
11074 ParamAttrs[Pos].Alignment =
11075 (*NI)
11076 ? (*NI)->EvaluateKnownConstInt(Ctx: C)
11077 : llvm::APSInt::getUnsigned(
11078 X: C.toCharUnitsFromBits(BitSize: C.getOpenMPDefaultSimdAlign(T: ParmTy))
11079 .getQuantity());
11080 ++NI;
11081 }
11082 // Mark linear parameters.
11083 auto *SI = Attr->steps_begin();
11084 auto *MI = Attr->modifiers_begin();
11085 for (const Expr *E : Attr->linears()) {
11086 E = E->IgnoreParenImpCasts();
11087 unsigned Pos;
11088 bool IsReferenceType = false;
11089 // Rescaling factor needed to compute the linear parameter
11090 // value in the mangled name.
11091 unsigned PtrRescalingFactor = 1;
11092 if (isa<CXXThisExpr>(Val: E)) {
11093 Pos = ParamPositions[FD];
11094 auto *P = cast<PointerType>(Val: E->getType());
11095 PtrRescalingFactor = CGM.getContext()
11096 .getTypeSizeInChars(T: P->getPointeeType())
11097 .getQuantity();
11098 } else {
11099 const auto *PVD = cast<ParmVarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())
11100 ->getCanonicalDecl();
11101 auto It = ParamPositions.find(Val: PVD);
11102 assert(It != ParamPositions.end() && "Function parameter not found");
11103 Pos = It->second;
11104 if (auto *P = dyn_cast<PointerType>(Val: PVD->getType()))
11105 PtrRescalingFactor = CGM.getContext()
11106 .getTypeSizeInChars(T: P->getPointeeType())
11107 .getQuantity();
11108 else if (PVD->getType()->isReferenceType()) {
11109 IsReferenceType = true;
11110 PtrRescalingFactor =
11111 CGM.getContext()
11112 .getTypeSizeInChars(T: PVD->getType().getNonReferenceType())
11113 .getQuantity();
11114 }
11115 }
11116 ParamAttrTy &ParamAttr = ParamAttrs[Pos];
11117 if (*MI == OMPC_LINEAR_ref)
11118 ParamAttr.Kind = LinearRef;
11119 else if (*MI == OMPC_LINEAR_uval)
11120 ParamAttr.Kind = LinearUVal;
11121 else if (IsReferenceType)
11122 ParamAttr.Kind = LinearVal;
11123 else
11124 ParamAttr.Kind = Linear;
11125 // Assuming a stride of 1, for `linear` without modifiers.
11126 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: 1);
11127 if (*SI) {
11128 Expr::EvalResult Result;
11129 if (!(*SI)->EvaluateAsInt(Result, Ctx: C, AllowSideEffects: Expr::SE_AllowSideEffects)) {
11130 if (const auto *DRE =
11131 cast<DeclRefExpr>(Val: (*SI)->IgnoreParenImpCasts())) {
11132 if (const auto *StridePVD =
11133 dyn_cast<ParmVarDecl>(Val: DRE->getDecl())) {
11134 ParamAttr.HasVarStride = true;
11135 auto It = ParamPositions.find(Val: StridePVD->getCanonicalDecl());
11136 assert(It != ParamPositions.end() &&
11137 "Function parameter not found");
11138 ParamAttr.StrideOrArg = llvm::APSInt::getUnsigned(X: It->second);
11139 }
11140 }
11141 } else {
11142 ParamAttr.StrideOrArg = Result.Val.getInt();
11143 }
11144 }
11145 // If we are using a linear clause on a pointer, we need to
11146 // rescale the value of linear_step with the byte size of the
11147 // pointee type.
11148 if (!ParamAttr.HasVarStride &&
11149 (ParamAttr.Kind == Linear || ParamAttr.Kind == LinearRef))
11150 ParamAttr.StrideOrArg = ParamAttr.StrideOrArg * PtrRescalingFactor;
11151 ++SI;
11152 ++MI;
11153 }
11154 llvm::APSInt VLENVal;
11155 SourceLocation ExprLoc;
11156 const Expr *VLENExpr = Attr->getSimdlen();
11157 if (VLENExpr) {
11158 VLENVal = VLENExpr->EvaluateKnownConstInt(Ctx: C);
11159 ExprLoc = VLENExpr->getExprLoc();
11160 }
11161 OMPDeclareSimdDeclAttr::BranchStateTy State = Attr->getBranchState();
11162 if (CGM.getTriple().isX86()) {
11163 emitX86DeclareSimdFunction(FD, Fn, VLENVal, ParamAttrs, State);
11164 } else if (CGM.getTriple().getArch() == llvm::Triple::aarch64) {
11165 unsigned VLEN = VLENVal.getExtValue();
11166 StringRef MangledName = Fn->getName();
11167 if (CGM.getTarget().hasFeature(Feature: "sve"))
11168 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11169 MangledName, ISA: 's', VecRegSize: 128, Fn, SLoc: ExprLoc);
11170 else if (CGM.getTarget().hasFeature(Feature: "neon"))
11171 emitAArch64DeclareSimdFunction(CGM, FD, UserVLEN: VLEN, ParamAttrs, State,
11172 MangledName, ISA: 'n', VecRegSize: 128, Fn, SLoc: ExprLoc);
11173 }
11174 }
11175 FD = FD->getPreviousDecl();
11176 }
11177}
11178
11179namespace {
11180/// Cleanup action for doacross support.
11181class DoacrossCleanupTy final : public EHScopeStack::Cleanup {
11182public:
11183 static const int DoacrossFinArgs = 2;
11184
11185private:
11186 llvm::FunctionCallee RTLFn;
11187 llvm::Value *Args[DoacrossFinArgs];
11188
11189public:
11190 DoacrossCleanupTy(llvm::FunctionCallee RTLFn,
11191 ArrayRef<llvm::Value *> CallArgs)
11192 : RTLFn(RTLFn) {
11193 assert(CallArgs.size() == DoacrossFinArgs);
11194 std::copy(first: CallArgs.begin(), last: CallArgs.end(), result: std::begin(arr&: Args));
11195 }
11196 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11197 if (!CGF.HaveInsertPoint())
11198 return;
11199 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11200 }
11201};
11202} // namespace
11203
11204void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction &CGF,
11205 const OMPLoopDirective &D,
11206 ArrayRef<Expr *> NumIterations) {
11207 if (!CGF.HaveInsertPoint())
11208 return;
11209
11210 ASTContext &C = CGM.getContext();
11211 QualType Int64Ty = C.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11212 RecordDecl *RD;
11213 if (KmpDimTy.isNull()) {
11214 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11215 // kmp_int64 lo; // lower
11216 // kmp_int64 up; // upper
11217 // kmp_int64 st; // stride
11218 // };
11219 RD = C.buildImplicitRecord(Name: "kmp_dim");
11220 RD->startDefinition();
11221 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11222 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11223 addFieldToRecordDecl(C, DC: RD, FieldTy: Int64Ty);
11224 RD->completeDefinition();
11225 KmpDimTy = C.getRecordType(Decl: RD);
11226 } else {
11227 RD = cast<RecordDecl>(Val: KmpDimTy->getAsTagDecl());
11228 }
11229 llvm::APInt Size(/*numBits=*/32, NumIterations.size());
11230 QualType ArrayTy = C.getConstantArrayType(EltTy: KmpDimTy, ArySize: Size, SizeExpr: nullptr,
11231 ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11232
11233 Address DimsAddr = CGF.CreateMemTemp(T: ArrayTy, Name: "dims");
11234 CGF.EmitNullInitialization(DestPtr: DimsAddr, Ty: ArrayTy);
11235 enum { LowerFD = 0, UpperFD, StrideFD };
11236 // Fill dims with data.
11237 for (unsigned I = 0, E = NumIterations.size(); I < E; ++I) {
11238 LValue DimsLVal = CGF.MakeAddrLValue(
11239 Addr: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: I), T: KmpDimTy);
11240 // dims.upper = num_iterations;
11241 LValue UpperLVal = CGF.EmitLValueForField(
11242 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: UpperFD));
11243 llvm::Value *NumIterVal = CGF.EmitScalarConversion(
11244 Src: CGF.EmitScalarExpr(E: NumIterations[I]), SrcTy: NumIterations[I]->getType(),
11245 DstTy: Int64Ty, Loc: NumIterations[I]->getExprLoc());
11246 CGF.EmitStoreOfScalar(value: NumIterVal, lvalue: UpperLVal);
11247 // dims.stride = 1;
11248 LValue StrideLVal = CGF.EmitLValueForField(
11249 Base: DimsLVal, Field: *std::next(x: RD->field_begin(), n: StrideFD));
11250 CGF.EmitStoreOfScalar(value: llvm::ConstantInt::getSigned(Ty: CGM.Int64Ty, /*V=*/1),
11251 lvalue: StrideLVal);
11252 }
11253
11254 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11255 // kmp_int32 num_dims, struct kmp_dim * dims);
11256 llvm::Value *Args[] = {
11257 emitUpdateLocation(CGF, Loc: D.getBeginLoc()),
11258 getThreadID(CGF, Loc: D.getBeginLoc()),
11259 llvm::ConstantInt::getSigned(Ty: CGM.Int32Ty, V: NumIterations.size()),
11260 CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11261 V: CGF.Builder.CreateConstArrayGEP(Addr: DimsAddr, Index: 0).emitRawPointer(CGF),
11262 DestTy: CGM.VoidPtrTy)};
11263
11264 llvm::FunctionCallee RTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11265 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_init);
11266 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11267 llvm::Value *FiniArgs[DoacrossCleanupTy::DoacrossFinArgs] = {
11268 emitUpdateLocation(CGF, Loc: D.getEndLoc()), getThreadID(CGF, Loc: D.getEndLoc())};
11269 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11270 M&: CGM.getModule(), FnID: OMPRTL___kmpc_doacross_fini);
11271 CGF.EHStack.pushCleanup<DoacrossCleanupTy>(Kind: NormalAndEHCleanup, A: FiniRTLFn,
11272 A: llvm::ArrayRef(FiniArgs));
11273}
11274
11275template <typename T>
11276static void EmitDoacrossOrdered(CodeGenFunction &CGF, CodeGenModule &CGM,
11277 const T *C, llvm::Value *ULoc,
11278 llvm::Value *ThreadID) {
11279 QualType Int64Ty =
11280 CGM.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11281 llvm::APInt Size(/*numBits=*/32, C->getNumLoops());
11282 QualType ArrayTy = CGM.getContext().getConstantArrayType(
11283 EltTy: Int64Ty, ArySize: Size, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, IndexTypeQuals: 0);
11284 Address CntAddr = CGF.CreateMemTemp(T: ArrayTy, Name: ".cnt.addr");
11285 for (unsigned I = 0, E = C->getNumLoops(); I < E; ++I) {
11286 const Expr *CounterVal = C->getLoopData(I);
11287 assert(CounterVal);
11288 llvm::Value *CntVal = CGF.EmitScalarConversion(
11289 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
11290 Loc: CounterVal->getExprLoc());
11291 CGF.EmitStoreOfScalar(Value: CntVal, Addr: CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: I),
11292 /*Volatile=*/false, Ty: Int64Ty);
11293 }
11294 llvm::Value *Args[] = {
11295 ULoc, ThreadID,
11296 CGF.Builder.CreateConstArrayGEP(Addr: CntAddr, Index: 0).emitRawPointer(CGF)};
11297 llvm::FunctionCallee RTLFn;
11298 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
11299 OMPDoacrossKind<T> ODK;
11300 if (ODK.isSource(C)) {
11301 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11302 FnID: OMPRTL___kmpc_doacross_post);
11303 } else {
11304 assert(ODK.isSink(C) && "Expect sink modifier.");
11305 RTLFn = OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(),
11306 FnID: OMPRTL___kmpc_doacross_wait);
11307 }
11308 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11309}
11310
11311void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11312 const OMPDependClause *C) {
11313 return EmitDoacrossOrdered<OMPDependClause>(
11314 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11315 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11316}
11317
11318void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
11319 const OMPDoacrossClause *C) {
11320 return EmitDoacrossOrdered<OMPDoacrossClause>(
11321 CGF, CGM, C, ULoc: emitUpdateLocation(CGF, Loc: C->getBeginLoc()),
11322 ThreadID: getThreadID(CGF, Loc: C->getBeginLoc()));
11323}
11324
11325void CGOpenMPRuntime::emitCall(CodeGenFunction &CGF, SourceLocation Loc,
11326 llvm::FunctionCallee Callee,
11327 ArrayRef<llvm::Value *> Args) const {
11328 assert(Loc.isValid() && "Outlined function call location must be valid.");
11329 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: Loc);
11330
11331 if (auto *Fn = dyn_cast<llvm::Function>(Val: Callee.getCallee())) {
11332 if (Fn->doesNotThrow()) {
11333 CGF.EmitNounwindRuntimeCall(callee: Fn, args: Args);
11334 return;
11335 }
11336 }
11337 CGF.EmitRuntimeCall(callee: Callee, args: Args);
11338}
11339
11340void CGOpenMPRuntime::emitOutlinedFunctionCall(
11341 CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
11342 ArrayRef<llvm::Value *> Args) const {
11343 emitCall(CGF, Loc, Callee: OutlinedFn, Args);
11344}
11345
11346void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) {
11347 if (const auto *FD = dyn_cast<FunctionDecl>(Val: D))
11348 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD: FD))
11349 HasEmittedDeclareTargetRegion = true;
11350}
11351
11352Address CGOpenMPRuntime::getParameterAddress(CodeGenFunction &CGF,
11353 const VarDecl *NativeParam,
11354 const VarDecl *TargetParam) const {
11355 return CGF.GetAddrOfLocalVar(VD: NativeParam);
11356}
11357
11358/// Return allocator value from expression, or return a null allocator (default
11359/// when no allocator specified).
11360static llvm::Value *getAllocatorVal(CodeGenFunction &CGF,
11361 const Expr *Allocator) {
11362 llvm::Value *AllocVal;
11363 if (Allocator) {
11364 AllocVal = CGF.EmitScalarExpr(E: Allocator);
11365 // According to the standard, the original allocator type is a enum
11366 // (integer). Convert to pointer type, if required.
11367 AllocVal = CGF.EmitScalarConversion(Src: AllocVal, SrcTy: Allocator->getType(),
11368 DstTy: CGF.getContext().VoidPtrTy,
11369 Loc: Allocator->getExprLoc());
11370 } else {
11371 // If no allocator specified, it defaults to the null allocator.
11372 AllocVal = llvm::Constant::getNullValue(
11373 Ty: CGF.CGM.getTypes().ConvertType(T: CGF.getContext().VoidPtrTy));
11374 }
11375 return AllocVal;
11376}
11377
11378/// Return the alignment from an allocate directive if present.
11379static llvm::Value *getAlignmentValue(CodeGenModule &CGM, const VarDecl *VD) {
11380 std::optional<CharUnits> AllocateAlignment = CGM.getOMPAllocateAlignment(VD);
11381
11382 if (!AllocateAlignment)
11383 return nullptr;
11384
11385 return llvm::ConstantInt::get(Ty: CGM.SizeTy, V: AllocateAlignment->getQuantity());
11386}
11387
11388Address CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction &CGF,
11389 const VarDecl *VD) {
11390 if (!VD)
11391 return Address::invalid();
11392 Address UntiedAddr = Address::invalid();
11393 Address UntiedRealAddr = Address::invalid();
11394 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11395 if (It != FunctionToUntiedTaskStackMap.end()) {
11396 const UntiedLocalVarsAddressesMap &UntiedData =
11397 UntiedLocalVarsStack[It->second];
11398 auto I = UntiedData.find(Key: VD);
11399 if (I != UntiedData.end()) {
11400 UntiedAddr = I->second.first;
11401 UntiedRealAddr = I->second.second;
11402 }
11403 }
11404 const VarDecl *CVD = VD->getCanonicalDecl();
11405 if (CVD->hasAttr<OMPAllocateDeclAttr>()) {
11406 // Use the default allocation.
11407 if (!isAllocatableDecl(VD))
11408 return UntiedAddr;
11409 llvm::Value *Size;
11410 CharUnits Align = CGM.getContext().getDeclAlign(D: CVD);
11411 if (CVD->getType()->isVariablyModifiedType()) {
11412 Size = CGF.getTypeSize(Ty: CVD->getType());
11413 // Align the size: ((size + align - 1) / align) * align
11414 Size = CGF.Builder.CreateNUWAdd(
11415 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
11416 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
11417 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
11418 } else {
11419 CharUnits Sz = CGM.getContext().getTypeSizeInChars(T: CVD->getType());
11420 Size = CGM.getSize(numChars: Sz.alignTo(Align));
11421 }
11422 llvm::Value *ThreadID = getThreadID(CGF, Loc: CVD->getBeginLoc());
11423 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
11424 const Expr *Allocator = AA->getAllocator();
11425 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator);
11426 llvm::Value *Alignment = getAlignmentValue(CGM, VD: CVD);
11427 SmallVector<llvm::Value *, 4> Args;
11428 Args.push_back(Elt: ThreadID);
11429 if (Alignment)
11430 Args.push_back(Elt: Alignment);
11431 Args.push_back(Elt: Size);
11432 Args.push_back(Elt: AllocVal);
11433 llvm::omp::RuntimeFunction FnID =
11434 Alignment ? OMPRTL___kmpc_aligned_alloc : OMPRTL___kmpc_alloc;
11435 llvm::Value *Addr = CGF.EmitRuntimeCall(
11436 callee: OMPBuilder.getOrCreateRuntimeFunction(M&: CGM.getModule(), FnID), args: Args,
11437 name: getName(Parts: {CVD->getName(), ".void.addr"}));
11438 llvm::FunctionCallee FiniRTLFn = OMPBuilder.getOrCreateRuntimeFunction(
11439 M&: CGM.getModule(), FnID: OMPRTL___kmpc_free);
11440 QualType Ty = CGM.getContext().getPointerType(T: CVD->getType());
11441 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11442 V: Addr, DestTy: CGF.ConvertTypeForMem(T: Ty), Name: getName(Parts: {CVD->getName(), ".addr"}));
11443 if (UntiedAddr.isValid())
11444 CGF.EmitStoreOfScalar(Value: Addr, Addr: UntiedAddr, /*Volatile=*/false, Ty);
11445
11446 // Cleanup action for allocate support.
11447 class OMPAllocateCleanupTy final : public EHScopeStack::Cleanup {
11448 llvm::FunctionCallee RTLFn;
11449 SourceLocation::UIntTy LocEncoding;
11450 Address Addr;
11451 const Expr *AllocExpr;
11452
11453 public:
11454 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn,
11455 SourceLocation::UIntTy LocEncoding, Address Addr,
11456 const Expr *AllocExpr)
11457 : RTLFn(RTLFn), LocEncoding(LocEncoding), Addr(Addr),
11458 AllocExpr(AllocExpr) {}
11459 void Emit(CodeGenFunction &CGF, Flags /*flags*/) override {
11460 if (!CGF.HaveInsertPoint())
11461 return;
11462 llvm::Value *Args[3];
11463 Args[0] = CGF.CGM.getOpenMPRuntime().getThreadID(
11464 CGF, Loc: SourceLocation::getFromRawEncoding(Encoding: LocEncoding));
11465 Args[1] = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11466 V: Addr.emitRawPointer(CGF), DestTy: CGF.VoidPtrTy);
11467 llvm::Value *AllocVal = getAllocatorVal(CGF, Allocator: AllocExpr);
11468 Args[2] = AllocVal;
11469 CGF.EmitRuntimeCall(callee: RTLFn, args: Args);
11470 }
11471 };
11472 Address VDAddr =
11473 UntiedRealAddr.isValid()
11474 ? UntiedRealAddr
11475 : Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
11476 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(
11477 Kind: NormalAndEHCleanup, A: FiniRTLFn, A: CVD->getLocation().getRawEncoding(),
11478 A: VDAddr, A: Allocator);
11479 if (UntiedRealAddr.isValid())
11480 if (auto *Region =
11481 dyn_cast_or_null<CGOpenMPRegionInfo>(Val: CGF.CapturedStmtInfo))
11482 Region->emitUntiedSwitch(CGF);
11483 return VDAddr;
11484 }
11485 return UntiedAddr;
11486}
11487
11488bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction &CGF,
11489 const VarDecl *VD) const {
11490 auto It = FunctionToUntiedTaskStackMap.find(Val: CGF.CurFn);
11491 if (It == FunctionToUntiedTaskStackMap.end())
11492 return false;
11493 return UntiedLocalVarsStack[It->second].count(Key: VD) > 0;
11494}
11495
11496CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11497 CodeGenModule &CGM, const OMPLoopDirective &S)
11498 : CGM(CGM), NeedToPush(S.hasClausesOfKind<OMPNontemporalClause>()) {
11499 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11500 if (!NeedToPush)
11501 return;
11502 NontemporalDeclsSet &DS =
11503 CGM.getOpenMPRuntime().NontemporalDeclsStack.emplace_back();
11504 for (const auto *C : S.getClausesOfKind<OMPNontemporalClause>()) {
11505 for (const Stmt *Ref : C->private_refs()) {
11506 const auto *SimpleRefExpr = cast<Expr>(Val: Ref)->IgnoreParenImpCasts();
11507 const ValueDecl *VD;
11508 if (const auto *DRE = dyn_cast<DeclRefExpr>(Val: SimpleRefExpr)) {
11509 VD = DRE->getDecl();
11510 } else {
11511 const auto *ME = cast<MemberExpr>(Val: SimpleRefExpr);
11512 assert((ME->isImplicitCXXThis() ||
11513 isa<CXXThisExpr>(ME->getBase()->IgnoreParenImpCasts())) &&
11514 "Expected member of current class.");
11515 VD = ME->getMemberDecl();
11516 }
11517 DS.insert(V: VD);
11518 }
11519 }
11520}
11521
11522CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11523 if (!NeedToPush)
11524 return;
11525 CGM.getOpenMPRuntime().NontemporalDeclsStack.pop_back();
11526}
11527
11528CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11529 CodeGenFunction &CGF,
11530 const llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
11531 std::pair<Address, Address>> &LocalVars)
11532 : CGM(CGF.CGM), NeedToPush(!LocalVars.empty()) {
11533 if (!NeedToPush)
11534 return;
11535 CGM.getOpenMPRuntime().FunctionToUntiedTaskStackMap.try_emplace(
11536 Key: CGF.CurFn, Args: CGM.getOpenMPRuntime().UntiedLocalVarsStack.size());
11537 CGM.getOpenMPRuntime().UntiedLocalVarsStack.push_back(Elt: LocalVars);
11538}
11539
11540CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11541 if (!NeedToPush)
11542 return;
11543 CGM.getOpenMPRuntime().UntiedLocalVarsStack.pop_back();
11544}
11545
11546bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
11547 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11548
11549 return llvm::any_of(
11550 Range&: CGM.getOpenMPRuntime().NontemporalDeclsStack,
11551 P: [VD](const NontemporalDeclsSet &Set) { return Set.contains(V: VD); });
11552}
11553
11554void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11555 const OMPExecutableDirective &S,
11556 llvm::DenseSet<CanonicalDeclPtr<const Decl>> &NeedToAddForLPCsAsDisabled)
11557 const {
11558 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToCheckForLPCs;
11559 // Vars in target/task regions must be excluded completely.
11560 if (isOpenMPTargetExecutionDirective(DKind: S.getDirectiveKind()) ||
11561 isOpenMPTaskingDirective(Kind: S.getDirectiveKind())) {
11562 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11563 getOpenMPCaptureRegions(CaptureRegions, DKind: S.getDirectiveKind());
11564 const CapturedStmt *CS = S.getCapturedStmt(RegionKind: CaptureRegions.front());
11565 for (const CapturedStmt::Capture &Cap : CS->captures()) {
11566 if (Cap.capturesVariable() || Cap.capturesVariableByCopy())
11567 NeedToCheckForLPCs.insert(V: Cap.getCapturedVar());
11568 }
11569 }
11570 // Exclude vars in private clauses.
11571 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
11572 for (const Expr *Ref : C->varlist()) {
11573 if (!Ref->getType()->isScalarType())
11574 continue;
11575 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11576 if (!DRE)
11577 continue;
11578 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11579 }
11580 }
11581 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
11582 for (const Expr *Ref : C->varlist()) {
11583 if (!Ref->getType()->isScalarType())
11584 continue;
11585 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11586 if (!DRE)
11587 continue;
11588 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11589 }
11590 }
11591 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11592 for (const Expr *Ref : C->varlist()) {
11593 if (!Ref->getType()->isScalarType())
11594 continue;
11595 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11596 if (!DRE)
11597 continue;
11598 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11599 }
11600 }
11601 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
11602 for (const Expr *Ref : C->varlist()) {
11603 if (!Ref->getType()->isScalarType())
11604 continue;
11605 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11606 if (!DRE)
11607 continue;
11608 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11609 }
11610 }
11611 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
11612 for (const Expr *Ref : C->varlist()) {
11613 if (!Ref->getType()->isScalarType())
11614 continue;
11615 const auto *DRE = dyn_cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts());
11616 if (!DRE)
11617 continue;
11618 NeedToCheckForLPCs.insert(V: DRE->getDecl());
11619 }
11620 }
11621 for (const Decl *VD : NeedToCheckForLPCs) {
11622 for (const LastprivateConditionalData &Data :
11623 llvm::reverse(C&: CGM.getOpenMPRuntime().LastprivateConditionalStack)) {
11624 if (Data.DeclToUniqueName.count(Key: VD) > 0) {
11625 if (!Data.Disabled)
11626 NeedToAddForLPCsAsDisabled.insert(V: VD);
11627 break;
11628 }
11629 }
11630 }
11631}
11632
11633CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11634 CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11635 : CGM(CGF.CGM),
11636 Action((CGM.getLangOpts().OpenMP >= 50 &&
11637 llvm::any_of(Range: S.getClausesOfKind<OMPLastprivateClause>(),
11638 P: [](const OMPLastprivateClause *C) {
11639 return C->getKind() ==
11640 OMPC_LASTPRIVATE_conditional;
11641 }))
11642 ? ActionToDo::PushAsLastprivateConditional
11643 : ActionToDo::DoNotPush) {
11644 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11645 if (CGM.getLangOpts().OpenMP < 50 || Action == ActionToDo::DoNotPush)
11646 return;
11647 assert(Action == ActionToDo::PushAsLastprivateConditional &&
11648 "Expected a push action.");
11649 LastprivateConditionalData &Data =
11650 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11651 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11652 if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11653 continue;
11654
11655 for (const Expr *Ref : C->varlist()) {
11656 Data.DeclToUniqueName.insert(KV: std::make_pair(
11657 x: cast<DeclRefExpr>(Val: Ref->IgnoreParenImpCasts())->getDecl(),
11658 y: SmallString<16>(generateUniqueName(CGM, Prefix: "pl_cond", Ref))));
11659 }
11660 }
11661 Data.IVLVal = IVLVal;
11662 Data.Fn = CGF.CurFn;
11663}
11664
11665CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11666 CodeGenFunction &CGF, const OMPExecutableDirective &S)
11667 : CGM(CGF.CGM), Action(ActionToDo::DoNotPush) {
11668 assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11669 if (CGM.getLangOpts().OpenMP < 50)
11670 return;
11671 llvm::DenseSet<CanonicalDeclPtr<const Decl>> NeedToAddForLPCsAsDisabled;
11672 tryToDisableInnerAnalysis(S, NeedToAddForLPCsAsDisabled);
11673 if (!NeedToAddForLPCsAsDisabled.empty()) {
11674 Action = ActionToDo::DisableLastprivateConditional;
11675 LastprivateConditionalData &Data =
11676 CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11677 for (const Decl *VD : NeedToAddForLPCsAsDisabled)
11678 Data.DeclToUniqueName.try_emplace(Key: VD);
11679 Data.Fn = CGF.CurFn;
11680 Data.Disabled = true;
11681 }
11682}
11683
11684CGOpenMPRuntime::LastprivateConditionalRAII
11685CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11686 CodeGenFunction &CGF, const OMPExecutableDirective &S) {
11687 return LastprivateConditionalRAII(CGF, S);
11688}
11689
11690CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11691 if (CGM.getLangOpts().OpenMP < 50)
11692 return;
11693 if (Action == ActionToDo::DisableLastprivateConditional) {
11694 assert(CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11695 "Expected list of disabled private vars.");
11696 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11697 }
11698 if (Action == ActionToDo::PushAsLastprivateConditional) {
11699 assert(
11700 !CGM.getOpenMPRuntime().LastprivateConditionalStack.back().Disabled &&
11701 "Expected list of lastprivate conditional vars.");
11702 CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11703 }
11704}
11705
11706Address CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction &CGF,
11707 const VarDecl *VD) {
11708 ASTContext &C = CGM.getContext();
11709 auto I = LastprivateConditionalToTypes.try_emplace(Key: CGF.CurFn).first;
11710 QualType NewType;
11711 const FieldDecl *VDField;
11712 const FieldDecl *FiredField;
11713 LValue BaseLVal;
11714 auto VI = I->getSecond().find(Val: VD);
11715 if (VI == I->getSecond().end()) {
11716 RecordDecl *RD = C.buildImplicitRecord(Name: "lasprivate.conditional");
11717 RD->startDefinition();
11718 VDField = addFieldToRecordDecl(C, DC: RD, FieldTy: VD->getType().getNonReferenceType());
11719 FiredField = addFieldToRecordDecl(C, DC: RD, FieldTy: C.CharTy);
11720 RD->completeDefinition();
11721 NewType = C.getRecordType(Decl: RD);
11722 Address Addr = CGF.CreateMemTemp(T: NewType, Align: C.getDeclAlign(D: VD), Name: VD->getName());
11723 BaseLVal = CGF.MakeAddrLValue(Addr, T: NewType, Source: AlignmentSource::Decl);
11724 I->getSecond().try_emplace(Key: VD, Args&: NewType, Args&: VDField, Args&: FiredField, Args&: BaseLVal);
11725 } else {
11726 NewType = std::get<0>(t&: VI->getSecond());
11727 VDField = std::get<1>(t&: VI->getSecond());
11728 FiredField = std::get<2>(t&: VI->getSecond());
11729 BaseLVal = std::get<3>(t&: VI->getSecond());
11730 }
11731 LValue FiredLVal =
11732 CGF.EmitLValueForField(Base: BaseLVal, Field: FiredField);
11733 CGF.EmitStoreOfScalar(
11734 value: llvm::ConstantInt::getNullValue(Ty: CGF.ConvertTypeForMem(T: C.CharTy)),
11735 lvalue: FiredLVal);
11736 return CGF.EmitLValueForField(Base: BaseLVal, Field: VDField).getAddress();
11737}
11738
11739namespace {
11740/// Checks if the lastprivate conditional variable is referenced in LHS.
11741class LastprivateConditionalRefChecker final
11742 : public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11743 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11744 const Expr *FoundE = nullptr;
11745 const Decl *FoundD = nullptr;
11746 StringRef UniqueDeclName;
11747 LValue IVLVal;
11748 llvm::Function *FoundFn = nullptr;
11749 SourceLocation Loc;
11750
11751public:
11752 bool VisitDeclRefExpr(const DeclRefExpr *E) {
11753 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11754 llvm::reverse(C&: LPM)) {
11755 auto It = D.DeclToUniqueName.find(Key: E->getDecl());
11756 if (It == D.DeclToUniqueName.end())
11757 continue;
11758 if (D.Disabled)
11759 return false;
11760 FoundE = E;
11761 FoundD = E->getDecl()->getCanonicalDecl();
11762 UniqueDeclName = It->second;
11763 IVLVal = D.IVLVal;
11764 FoundFn = D.Fn;
11765 break;
11766 }
11767 return FoundE == E;
11768 }
11769 bool VisitMemberExpr(const MemberExpr *E) {
11770 if (!CodeGenFunction::IsWrappedCXXThis(E: E->getBase()))
11771 return false;
11772 for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11773 llvm::reverse(C&: LPM)) {
11774 auto It = D.DeclToUniqueName.find(Key: E->getMemberDecl());
11775 if (It == D.DeclToUniqueName.end())
11776 continue;
11777 if (D.Disabled)
11778 return false;
11779 FoundE = E;
11780 FoundD = E->getMemberDecl()->getCanonicalDecl();
11781 UniqueDeclName = It->second;
11782 IVLVal = D.IVLVal;
11783 FoundFn = D.Fn;
11784 break;
11785 }
11786 return FoundE == E;
11787 }
11788 bool VisitStmt(const Stmt *S) {
11789 for (const Stmt *Child : S->children()) {
11790 if (!Child)
11791 continue;
11792 if (const auto *E = dyn_cast<Expr>(Val: Child))
11793 if (!E->isGLValue())
11794 continue;
11795 if (Visit(S: Child))
11796 return true;
11797 }
11798 return false;
11799 }
11800 explicit LastprivateConditionalRefChecker(
11801 ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11802 : LPM(LPM) {}
11803 std::tuple<const Expr *, const Decl *, StringRef, LValue, llvm::Function *>
11804 getFoundData() const {
11805 return std::make_tuple(args: FoundE, args: FoundD, args: UniqueDeclName, args: IVLVal, args: FoundFn);
11806 }
11807};
11808} // namespace
11809
11810void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction &CGF,
11811 LValue IVLVal,
11812 StringRef UniqueDeclName,
11813 LValue LVal,
11814 SourceLocation Loc) {
11815 // Last updated loop counter for the lastprivate conditional var.
11816 // int<xx> last_iv = 0;
11817 llvm::Type *LLIVTy = CGF.ConvertTypeForMem(T: IVLVal.getType());
11818 llvm::Constant *LastIV = OMPBuilder.getOrCreateInternalVariable(
11819 Ty: LLIVTy, Name: getName(Parts: {UniqueDeclName, "iv"}));
11820 cast<llvm::GlobalVariable>(Val: LastIV)->setAlignment(
11821 IVLVal.getAlignment().getAsAlign());
11822 LValue LastIVLVal =
11823 CGF.MakeNaturalAlignRawAddrLValue(V: LastIV, T: IVLVal.getType());
11824
11825 // Last value of the lastprivate conditional.
11826 // decltype(priv_a) last_a;
11827 llvm::GlobalVariable *Last = OMPBuilder.getOrCreateInternalVariable(
11828 Ty: CGF.ConvertTypeForMem(T: LVal.getType()), Name: UniqueDeclName);
11829 cast<llvm::GlobalVariable>(Val: Last)->setAlignment(
11830 LVal.getAlignment().getAsAlign());
11831 LValue LastLVal =
11832 CGF.MakeRawAddrLValue(V: Last, T: LVal.getType(), Alignment: LVal.getAlignment());
11833
11834 // Global loop counter. Required to handle inner parallel-for regions.
11835 // iv
11836 llvm::Value *IVVal = CGF.EmitLoadOfScalar(lvalue: IVLVal, Loc);
11837
11838 // #pragma omp critical(a)
11839 // if (last_iv <= iv) {
11840 // last_iv = iv;
11841 // last_a = priv_a;
11842 // }
11843 auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11844 Loc](CodeGenFunction &CGF, PrePostActionTy &Action) {
11845 Action.Enter(CGF);
11846 llvm::Value *LastIVVal = CGF.EmitLoadOfScalar(lvalue: LastIVLVal, Loc);
11847 // (last_iv <= iv) ? Check if the variable is updated and store new
11848 // value in global var.
11849 llvm::Value *CmpRes;
11850 if (IVLVal.getType()->isSignedIntegerType()) {
11851 CmpRes = CGF.Builder.CreateICmpSLE(LHS: LastIVVal, RHS: IVVal);
11852 } else {
11853 assert(IVLVal.getType()->isUnsignedIntegerType() &&
11854 "Loop iteration variable must be integer.");
11855 CmpRes = CGF.Builder.CreateICmpULE(LHS: LastIVVal, RHS: IVVal);
11856 }
11857 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lp_cond_then");
11858 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "lp_cond_exit");
11859 CGF.Builder.CreateCondBr(Cond: CmpRes, True: ThenBB, False: ExitBB);
11860 // {
11861 CGF.EmitBlock(BB: ThenBB);
11862
11863 // last_iv = iv;
11864 CGF.EmitStoreOfScalar(value: IVVal, lvalue: LastIVLVal);
11865
11866 // last_a = priv_a;
11867 switch (CGF.getEvaluationKind(T: LVal.getType())) {
11868 case TEK_Scalar: {
11869 llvm::Value *PrivVal = CGF.EmitLoadOfScalar(lvalue: LVal, Loc);
11870 CGF.EmitStoreOfScalar(value: PrivVal, lvalue: LastLVal);
11871 break;
11872 }
11873 case TEK_Complex: {
11874 CodeGenFunction::ComplexPairTy PrivVal = CGF.EmitLoadOfComplex(src: LVal, loc: Loc);
11875 CGF.EmitStoreOfComplex(V: PrivVal, dest: LastLVal, /*isInit=*/false);
11876 break;
11877 }
11878 case TEK_Aggregate:
11879 llvm_unreachable(
11880 "Aggregates are not supported in lastprivate conditional.");
11881 }
11882 // }
11883 CGF.EmitBranch(Block: ExitBB);
11884 // There is no need to emit line number for unconditional branch.
11885 (void)ApplyDebugLocation::CreateEmpty(CGF);
11886 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
11887 };
11888
11889 if (CGM.getLangOpts().OpenMPSimd) {
11890 // Do not emit as a critical region as no parallel region could be emitted.
11891 RegionCodeGenTy ThenRCG(CodeGen);
11892 ThenRCG(CGF);
11893 } else {
11894 emitCriticalRegion(CGF, CriticalName: UniqueDeclName, CriticalOpGen: CodeGen, Loc);
11895 }
11896}
11897
11898void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11899 const Expr *LHS) {
11900 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11901 return;
11902 LastprivateConditionalRefChecker Checker(LastprivateConditionalStack);
11903 if (!Checker.Visit(S: LHS))
11904 return;
11905 const Expr *FoundE;
11906 const Decl *FoundD;
11907 StringRef UniqueDeclName;
11908 LValue IVLVal;
11909 llvm::Function *FoundFn;
11910 std::tie(args&: FoundE, args&: FoundD, args&: UniqueDeclName, args&: IVLVal, args&: FoundFn) =
11911 Checker.getFoundData();
11912 if (FoundFn != CGF.CurFn) {
11913 // Special codegen for inner parallel regions.
11914 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11915 auto It = LastprivateConditionalToTypes[FoundFn].find(Val: FoundD);
11916 assert(It != LastprivateConditionalToTypes[FoundFn].end() &&
11917 "Lastprivate conditional is not found in outer region.");
11918 QualType StructTy = std::get<0>(t&: It->getSecond());
11919 const FieldDecl* FiredDecl = std::get<2>(t&: It->getSecond());
11920 LValue PrivLVal = CGF.EmitLValue(E: FoundE);
11921 Address StructAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
11922 Addr: PrivLVal.getAddress(),
11923 Ty: CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: StructTy)),
11924 ElementTy: CGF.ConvertTypeForMem(T: StructTy));
11925 LValue BaseLVal =
11926 CGF.MakeAddrLValue(Addr: StructAddr, T: StructTy, Source: AlignmentSource::Decl);
11927 LValue FiredLVal = CGF.EmitLValueForField(Base: BaseLVal, Field: FiredDecl);
11928 CGF.EmitAtomicStore(rvalue: RValue::get(V: llvm::ConstantInt::get(
11929 Ty: CGF.ConvertTypeForMem(T: FiredDecl->getType()), V: 1)),
11930 lvalue: FiredLVal, AO: llvm::AtomicOrdering::Unordered,
11931 /*IsVolatile=*/true, /*isInit=*/false);
11932 return;
11933 }
11934
11935 // Private address of the lastprivate conditional in the current context.
11936 // priv_a
11937 LValue LVal = CGF.EmitLValue(E: FoundE);
11938 emitLastprivateConditionalUpdate(CGF, IVLVal, UniqueDeclName, LVal,
11939 Loc: FoundE->getExprLoc());
11940}
11941
11942void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11943 CodeGenFunction &CGF, const OMPExecutableDirective &D,
11944 const llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> &IgnoredDecls) {
11945 if (CGF.getLangOpts().OpenMP < 50 || LastprivateConditionalStack.empty())
11946 return;
11947 auto Range = llvm::reverse(C&: LastprivateConditionalStack);
11948 auto It = llvm::find_if(
11949 Range, P: [](const LastprivateConditionalData &D) { return !D.Disabled; });
11950 if (It == Range.end() || It->Fn != CGF.CurFn)
11951 return;
11952 auto LPCI = LastprivateConditionalToTypes.find(Val: It->Fn);
11953 assert(LPCI != LastprivateConditionalToTypes.end() &&
11954 "Lastprivates must be registered already.");
11955 SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
11956 getOpenMPCaptureRegions(CaptureRegions, DKind: D.getDirectiveKind());
11957 const CapturedStmt *CS = D.getCapturedStmt(RegionKind: CaptureRegions.back());
11958 for (const auto &Pair : It->DeclToUniqueName) {
11959 const auto *VD = cast<VarDecl>(Val: Pair.first->getCanonicalDecl());
11960 if (!CS->capturesVariable(Var: VD) || IgnoredDecls.contains(V: VD))
11961 continue;
11962 auto I = LPCI->getSecond().find(Val: Pair.first);
11963 assert(I != LPCI->getSecond().end() &&
11964 "Lastprivate must be rehistered already.");
11965 // bool Cmp = priv_a.Fired != 0;
11966 LValue BaseLVal = std::get<3>(t&: I->getSecond());
11967 LValue FiredLVal =
11968 CGF.EmitLValueForField(Base: BaseLVal, Field: std::get<2>(t&: I->getSecond()));
11969 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: FiredLVal, Loc: D.getBeginLoc());
11970 llvm::Value *Cmp = CGF.Builder.CreateIsNotNull(Arg: Res);
11971 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: "lpc.then");
11972 llvm::BasicBlock *DoneBB = CGF.createBasicBlock(name: "lpc.done");
11973 // if (Cmp) {
11974 CGF.Builder.CreateCondBr(Cond: Cmp, True: ThenBB, False: DoneBB);
11975 CGF.EmitBlock(BB: ThenBB);
11976 Address Addr = CGF.GetAddrOfLocalVar(VD);
11977 LValue LVal;
11978 if (VD->getType()->isReferenceType())
11979 LVal = CGF.EmitLoadOfReferenceLValue(RefAddr: Addr, RefTy: VD->getType(),
11980 Source: AlignmentSource::Decl);
11981 else
11982 LVal = CGF.MakeAddrLValue(Addr, T: VD->getType().getNonReferenceType(),
11983 Source: AlignmentSource::Decl);
11984 emitLastprivateConditionalUpdate(CGF, IVLVal: It->IVLVal, UniqueDeclName: Pair.second, LVal,
11985 Loc: D.getBeginLoc());
11986 auto AL = ApplyDebugLocation::CreateArtificial(CGF);
11987 CGF.EmitBlock(BB: DoneBB, /*IsFinal=*/IsFinished: true);
11988 // }
11989 }
11990}
11991
11992void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11993 CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11994 SourceLocation Loc) {
11995 if (CGF.getLangOpts().OpenMP < 50)
11996 return;
11997 auto It = LastprivateConditionalStack.back().DeclToUniqueName.find(Key: VD);
11998 assert(It != LastprivateConditionalStack.back().DeclToUniqueName.end() &&
11999 "Unknown lastprivate conditional variable.");
12000 StringRef UniqueName = It->second;
12001 llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(Name: UniqueName);
12002 // The variable was not updated in the region - exit.
12003 if (!GV)
12004 return;
12005 LValue LPLVal = CGF.MakeRawAddrLValue(
12006 V: GV, T: PrivLVal.getType().getNonReferenceType(), Alignment: PrivLVal.getAlignment());
12007 llvm::Value *Res = CGF.EmitLoadOfScalar(lvalue: LPLVal, Loc);
12008 CGF.EmitStoreOfScalar(value: Res, lvalue: PrivLVal);
12009}
12010
12011llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12012 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12013 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12014 const RegionCodeGenTy &CodeGen) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12016}
12017
12018llvm::Function *CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12019 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12020 const VarDecl *ThreadIDVar, OpenMPDirectiveKind InnermostKind,
12021 const RegionCodeGenTy &CodeGen) {
12022 llvm_unreachable("Not supported in SIMD-only mode");
12023}
12024
12025llvm::Function *CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12026 const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
12027 const VarDecl *PartIDVar, const VarDecl *TaskTVar,
12028 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
12029 bool Tied, unsigned &NumberOfParts) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12031}
12032
12033void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction &CGF,
12034 SourceLocation Loc,
12035 llvm::Function *OutlinedFn,
12036 ArrayRef<llvm::Value *> CapturedVars,
12037 const Expr *IfCond,
12038 llvm::Value *NumThreads) {
12039 llvm_unreachable("Not supported in SIMD-only mode");
12040}
12041
12042void CGOpenMPSIMDRuntime::emitCriticalRegion(
12043 CodeGenFunction &CGF, StringRef CriticalName,
12044 const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
12045 const Expr *Hint) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12047}
12048
12049void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction &CGF,
12050 const RegionCodeGenTy &MasterOpGen,
12051 SourceLocation Loc) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12053}
12054
12055void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction &CGF,
12056 const RegionCodeGenTy &MasterOpGen,
12057 SourceLocation Loc,
12058 const Expr *Filter) {
12059 llvm_unreachable("Not supported in SIMD-only mode");
12060}
12061
12062void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction &CGF,
12063 SourceLocation Loc) {
12064 llvm_unreachable("Not supported in SIMD-only mode");
12065}
12066
12067void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12068 CodeGenFunction &CGF, const RegionCodeGenTy &TaskgroupOpGen,
12069 SourceLocation Loc) {
12070 llvm_unreachable("Not supported in SIMD-only mode");
12071}
12072
12073void CGOpenMPSIMDRuntime::emitSingleRegion(
12074 CodeGenFunction &CGF, const RegionCodeGenTy &SingleOpGen,
12075 SourceLocation Loc, ArrayRef<const Expr *> CopyprivateVars,
12076 ArrayRef<const Expr *> DestExprs, ArrayRef<const Expr *> SrcExprs,
12077 ArrayRef<const Expr *> AssignmentOps) {
12078 llvm_unreachable("Not supported in SIMD-only mode");
12079}
12080
12081void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction &CGF,
12082 const RegionCodeGenTy &OrderedOpGen,
12083 SourceLocation Loc,
12084 bool IsThreads) {
12085 llvm_unreachable("Not supported in SIMD-only mode");
12086}
12087
12088void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction &CGF,
12089 SourceLocation Loc,
12090 OpenMPDirectiveKind Kind,
12091 bool EmitChecks,
12092 bool ForceSimpleCall) {
12093 llvm_unreachable("Not supported in SIMD-only mode");
12094}
12095
12096void CGOpenMPSIMDRuntime::emitForDispatchInit(
12097 CodeGenFunction &CGF, SourceLocation Loc,
12098 const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
12099 bool Ordered, const DispatchRTInput &DispatchValues) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12101}
12102
12103void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction &CGF,
12104 SourceLocation Loc) {
12105 llvm_unreachable("Not supported in SIMD-only mode");
12106}
12107
12108void CGOpenMPSIMDRuntime::emitForStaticInit(
12109 CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind,
12110 const OpenMPScheduleTy &ScheduleKind, const StaticRTInput &Values) {
12111 llvm_unreachable("Not supported in SIMD-only mode");
12112}
12113
12114void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12115 CodeGenFunction &CGF, SourceLocation Loc,
12116 OpenMPDistScheduleClauseKind SchedKind, const StaticRTInput &Values) {
12117 llvm_unreachable("Not supported in SIMD-only mode");
12118}
12119
12120void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
12121 SourceLocation Loc,
12122 unsigned IVSize,
12123 bool IVSigned) {
12124 llvm_unreachable("Not supported in SIMD-only mode");
12125}
12126
12127void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction &CGF,
12128 SourceLocation Loc,
12129 OpenMPDirectiveKind DKind) {
12130 llvm_unreachable("Not supported in SIMD-only mode");
12131}
12132
12133llvm::Value *CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction &CGF,
12134 SourceLocation Loc,
12135 unsigned IVSize, bool IVSigned,
12136 Address IL, Address LB,
12137 Address UB, Address ST) {
12138 llvm_unreachable("Not supported in SIMD-only mode");
12139}
12140
12141void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction &CGF,
12142 llvm::Value *NumThreads,
12143 SourceLocation Loc) {
12144 llvm_unreachable("Not supported in SIMD-only mode");
12145}
12146
12147void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction &CGF,
12148 ProcBindKind ProcBind,
12149 SourceLocation Loc) {
12150 llvm_unreachable("Not supported in SIMD-only mode");
12151}
12152
12153Address CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
12154 const VarDecl *VD,
12155 Address VDAddr,
12156 SourceLocation Loc) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12158}
12159
12160llvm::Function *CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12161 const VarDecl *VD, Address VDAddr, SourceLocation Loc, bool PerformInit,
12162 CodeGenFunction *CGF) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12164}
12165
12166Address CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12167 CodeGenFunction &CGF, QualType VarType, StringRef Name) {
12168 llvm_unreachable("Not supported in SIMD-only mode");
12169}
12170
12171void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction &CGF,
12172 ArrayRef<const Expr *> Vars,
12173 SourceLocation Loc,
12174 llvm::AtomicOrdering AO) {
12175 llvm_unreachable("Not supported in SIMD-only mode");
12176}
12177
12178void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction &CGF, SourceLocation Loc,
12179 const OMPExecutableDirective &D,
12180 llvm::Function *TaskFunction,
12181 QualType SharedsTy, Address Shareds,
12182 const Expr *IfCond,
12183 const OMPTaskDataTy &Data) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12185}
12186
12187void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12188 CodeGenFunction &CGF, SourceLocation Loc, const OMPLoopDirective &D,
12189 llvm::Function *TaskFunction, QualType SharedsTy, Address Shareds,
12190 const Expr *IfCond, const OMPTaskDataTy &Data) {
12191 llvm_unreachable("Not supported in SIMD-only mode");
12192}
12193
12194void CGOpenMPSIMDRuntime::emitReduction(
12195 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
12196 ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
12197 ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
12198 assert(Options.SimpleReduction && "Only simple reduction is expected.");
12199 CGOpenMPRuntime::emitReduction(CGF, Loc, OrgPrivates: Privates, OrgLHSExprs: LHSExprs, OrgRHSExprs: RHSExprs,
12200 OrgReductionOps: ReductionOps, Options);
12201}
12202
12203llvm::Value *CGOpenMPSIMDRuntime::emitTaskReductionInit(
12204 CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> LHSExprs,
12205 ArrayRef<const Expr *> RHSExprs, const OMPTaskDataTy &Data) {
12206 llvm_unreachable("Not supported in SIMD-only mode");
12207}
12208
12209void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction &CGF,
12210 SourceLocation Loc,
12211 bool IsWorksharingReduction) {
12212 llvm_unreachable("Not supported in SIMD-only mode");
12213}
12214
12215void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction &CGF,
12216 SourceLocation Loc,
12217 ReductionCodeGen &RCG,
12218 unsigned N) {
12219 llvm_unreachable("Not supported in SIMD-only mode");
12220}
12221
12222Address CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction &CGF,
12223 SourceLocation Loc,
12224 llvm::Value *ReductionsPtr,
12225 LValue SharedLVal) {
12226 llvm_unreachable("Not supported in SIMD-only mode");
12227}
12228
12229void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction &CGF,
12230 SourceLocation Loc,
12231 const OMPTaskDataTy &Data) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12233}
12234
12235void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12236 CodeGenFunction &CGF, SourceLocation Loc,
12237 OpenMPDirectiveKind CancelRegion) {
12238 llvm_unreachable("Not supported in SIMD-only mode");
12239}
12240
12241void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction &CGF,
12242 SourceLocation Loc, const Expr *IfCond,
12243 OpenMPDirectiveKind CancelRegion) {
12244 llvm_unreachable("Not supported in SIMD-only mode");
12245}
12246
12247void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12248 const OMPExecutableDirective &D, StringRef ParentName,
12249 llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
12250 bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
12251 llvm_unreachable("Not supported in SIMD-only mode");
12252}
12253
12254void CGOpenMPSIMDRuntime::emitTargetCall(
12255 CodeGenFunction &CGF, const OMPExecutableDirective &D,
12256 llvm::Function *OutlinedFn, llvm::Value *OutlinedFnID, const Expr *IfCond,
12257 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device,
12258 llvm::function_ref<llvm::Value *(CodeGenFunction &CGF,
12259 const OMPLoopDirective &D)>
12260 SizeEmitter) {
12261 llvm_unreachable("Not supported in SIMD-only mode");
12262}
12263
12264bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD) {
12265 llvm_unreachable("Not supported in SIMD-only mode");
12266}
12267
12268bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD) {
12269 llvm_unreachable("Not supported in SIMD-only mode");
12270}
12271
12272bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD) {
12273 return false;
12274}
12275
12276void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction &CGF,
12277 const OMPExecutableDirective &D,
12278 SourceLocation Loc,
12279 llvm::Function *OutlinedFn,
12280 ArrayRef<llvm::Value *> CapturedVars) {
12281 llvm_unreachable("Not supported in SIMD-only mode");
12282}
12283
12284void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction &CGF,
12285 const Expr *NumTeams,
12286 const Expr *ThreadLimit,
12287 SourceLocation Loc) {
12288 llvm_unreachable("Not supported in SIMD-only mode");
12289}
12290
12291void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12292 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12293 const Expr *Device, const RegionCodeGenTy &CodeGen,
12294 CGOpenMPRuntime::TargetDataInfo &Info) {
12295 llvm_unreachable("Not supported in SIMD-only mode");
12296}
12297
12298void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12299 CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *IfCond,
12300 const Expr *Device) {
12301 llvm_unreachable("Not supported in SIMD-only mode");
12302}
12303
12304void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction &CGF,
12305 const OMPLoopDirective &D,
12306 ArrayRef<Expr *> NumIterations) {
12307 llvm_unreachable("Not supported in SIMD-only mode");
12308}
12309
12310void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12311 const OMPDependClause *C) {
12312 llvm_unreachable("Not supported in SIMD-only mode");
12313}
12314
12315void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction &CGF,
12316 const OMPDoacrossClause *C) {
12317 llvm_unreachable("Not supported in SIMD-only mode");
12318}
12319
12320const VarDecl *
12321CGOpenMPSIMDRuntime::translateParameter(const FieldDecl *FD,
12322 const VarDecl *NativeParam) const {
12323 llvm_unreachable("Not supported in SIMD-only mode");
12324}
12325
12326Address
12327CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction &CGF,
12328 const VarDecl *NativeParam,
12329 const VarDecl *TargetParam) const {
12330 llvm_unreachable("Not supported in SIMD-only mode");
12331}
12332

source code of clang/lib/CodeGen/CGOpenMPRuntime.cpp