1//===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This contains code to emit OpenMP nodes as LLVM code.
10//
11//===----------------------------------------------------------------------===//
12
13#include "CGCleanup.h"
14#include "CGOpenMPRuntime.h"
15#include "CodeGenFunction.h"
16#include "CodeGenModule.h"
17#include "TargetInfo.h"
18#include "clang/AST/ASTContext.h"
19#include "clang/AST/Attr.h"
20#include "clang/AST/DeclOpenMP.h"
21#include "clang/AST/OpenMPClause.h"
22#include "clang/AST/Stmt.h"
23#include "clang/AST/StmtOpenMP.h"
24#include "clang/AST/StmtVisitor.h"
25#include "clang/Basic/OpenMPKinds.h"
26#include "clang/Basic/PrettyStackTrace.h"
27#include "clang/Basic/SourceManager.h"
28#include "llvm/ADT/SmallSet.h"
29#include "llvm/BinaryFormat/Dwarf.h"
30#include "llvm/Frontend/OpenMP/OMPConstants.h"
31#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DebugInfoMetadata.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/IR/Metadata.h"
37#include "llvm/Support/AtomicOrdering.h"
38#include "llvm/Support/Debug.h"
39#include <optional>
40using namespace clang;
41using namespace CodeGen;
42using namespace llvm::omp;
43
44#define TTL_CODEGEN_TYPE "target-teams-loop-codegen"
45
46static const VarDecl *getBaseDecl(const Expr *Ref);
47
48namespace {
49/// Lexical scope for OpenMP executable constructs, that handles correct codegen
50/// for captured expressions.
51class OMPLexicalScope : public CodeGenFunction::LexicalScope {
52 void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
53 for (const auto *C : S.clauses()) {
54 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
55 if (const auto *PreInit =
56 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
57 for (const auto *I : PreInit->decls()) {
58 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
59 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
60 } else {
61 CodeGenFunction::AutoVarEmission Emission =
62 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
63 CGF.EmitAutoVarCleanups(emission: Emission);
64 }
65 }
66 }
67 }
68 }
69 }
70 CodeGenFunction::OMPPrivateScope InlinedShareds;
71
72 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
73 return CGF.LambdaCaptureFields.lookup(VD) ||
74 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
75 (CGF.CurCodeDecl && isa<BlockDecl>(Val: CGF.CurCodeDecl) &&
76 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
77 }
78
79public:
80 OMPLexicalScope(
81 CodeGenFunction &CGF, const OMPExecutableDirective &S,
82 const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt,
83 const bool EmitPreInitStmt = true)
84 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
85 InlinedShareds(CGF) {
86 if (EmitPreInitStmt)
87 emitPreInitStmt(CGF, S);
88 if (!CapturedRegion)
89 return;
90 assert(S.hasAssociatedStmt() &&
91 "Expected associated statement for inlined directive.");
92 const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion);
93 for (const auto &C : CS->captures()) {
94 if (C.capturesVariable() || C.capturesVariableByCopy()) {
95 auto *VD = C.getCapturedVar();
96 assert(VD == VD->getCanonicalDecl() &&
97 "Canonical decl must be captured.");
98 DeclRefExpr DRE(
99 CGF.getContext(), const_cast<VarDecl *>(VD),
100 isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo &&
101 InlinedShareds.isGlobalVarCaptured(VD)),
102 VD->getType().getNonReferenceType(), VK_LValue, C.getLocation());
103 InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF));
104 }
105 }
106 (void)InlinedShareds.Privatize();
107 }
108};
109
110/// Lexical scope for OpenMP parallel construct, that handles correct codegen
111/// for captured expressions.
112class OMPParallelScope final : public OMPLexicalScope {
113 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
114 OpenMPDirectiveKind Kind = S.getDirectiveKind();
115 return !(isOpenMPTargetExecutionDirective(Kind) ||
116 isOpenMPLoopBoundSharingDirective(Kind)) &&
117 isOpenMPParallelDirective(Kind);
118 }
119
120public:
121 OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
122 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
123 EmitPreInitStmt(S)) {}
124};
125
126/// Lexical scope for OpenMP teams construct, that handles correct codegen
127/// for captured expressions.
128class OMPTeamsScope final : public OMPLexicalScope {
129 bool EmitPreInitStmt(const OMPExecutableDirective &S) {
130 OpenMPDirectiveKind Kind = S.getDirectiveKind();
131 return !isOpenMPTargetExecutionDirective(Kind) &&
132 isOpenMPTeamsDirective(Kind);
133 }
134
135public:
136 OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
137 : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt,
138 EmitPreInitStmt(S)) {}
139};
140
141/// Private scope for OpenMP loop-based directives, that supports capturing
142/// of used expression from loop statement.
143class OMPLoopScope : public CodeGenFunction::RunCleanupsScope {
144 void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) {
145 const DeclStmt *PreInits;
146 CodeGenFunction::OMPMapVars PreCondVars;
147 if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) {
148 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
149 for (const auto *E : LD->counters()) {
150 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
151 EmittedAsPrivate.insert(V: VD->getCanonicalDecl());
152 (void)PreCondVars.setVarAddr(
153 CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(VD->getType().getNonReferenceType()));
154 }
155 // Mark private vars as undefs.
156 for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) {
157 for (const Expr *IRef : C->varlists()) {
158 const auto *OrigVD =
159 cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl());
160 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
161 QualType OrigVDTy = OrigVD->getType().getNonReferenceType();
162 (void)PreCondVars.setVarAddr(
163 CGF, OrigVD,
164 Address(llvm::UndefValue::get(CGF.ConvertTypeForMem(
165 CGF.getContext().getPointerType(OrigVDTy))),
166 CGF.ConvertTypeForMem(OrigVDTy),
167 CGF.getContext().getDeclAlign(OrigVD)));
168 }
169 }
170 }
171 (void)PreCondVars.apply(CGF);
172 // Emit init, __range and __end variables for C++ range loops.
173 (void)OMPLoopBasedDirective::doForAllLoops(
174 LD->getInnermostCapturedStmt()->getCapturedStmt(),
175 /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(),
176 [&CGF](unsigned Cnt, const Stmt *CurStmt) {
177 if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) {
178 if (const Stmt *Init = CXXFor->getInit())
179 CGF.EmitStmt(S: Init);
180 CGF.EmitStmt(S: CXXFor->getRangeStmt());
181 CGF.EmitStmt(S: CXXFor->getEndStmt());
182 }
183 return false;
184 });
185 PreInits = cast_or_null<DeclStmt>(Val: LD->getPreInits());
186 } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) {
187 PreInits = cast_or_null<DeclStmt>(Val: Tile->getPreInits());
188 } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) {
189 PreInits = cast_or_null<DeclStmt>(Val: Unroll->getPreInits());
190 } else {
191 llvm_unreachable("Unknown loop-based directive kind.");
192 }
193 if (PreInits) {
194 for (const auto *I : PreInits->decls())
195 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
196 }
197 PreCondVars.restore(CGF);
198 }
199
200public:
201 OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S)
202 : CodeGenFunction::RunCleanupsScope(CGF) {
203 emitPreInitStmt(CGF, S);
204 }
205};
206
207class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope {
208 CodeGenFunction::OMPPrivateScope InlinedShareds;
209
210 static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) {
211 return CGF.LambdaCaptureFields.lookup(VD) ||
212 (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) ||
213 (CGF.CurCodeDecl && isa<BlockDecl>(Val: CGF.CurCodeDecl) &&
214 cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD));
215 }
216
217public:
218 OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S)
219 : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()),
220 InlinedShareds(CGF) {
221 for (const auto *C : S.clauses()) {
222 if (const auto *CPI = OMPClauseWithPreInit::get(C)) {
223 if (const auto *PreInit =
224 cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) {
225 for (const auto *I : PreInit->decls()) {
226 if (!I->hasAttr<OMPCaptureNoInitAttr>()) {
227 CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I));
228 } else {
229 CodeGenFunction::AutoVarEmission Emission =
230 CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I));
231 CGF.EmitAutoVarCleanups(emission: Emission);
232 }
233 }
234 }
235 } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) {
236 for (const Expr *E : UDP->varlists()) {
237 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
238 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
239 CGF.EmitVarDecl(*OED);
240 }
241 } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) {
242 for (const Expr *E : UDP->varlists()) {
243 const Decl *D = getBaseDecl(E);
244 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
245 CGF.EmitVarDecl(*OED);
246 }
247 }
248 }
249 if (!isOpenMPSimdDirective(S.getDirectiveKind()))
250 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds);
251 if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) {
252 if (const Expr *E = TG->getReductionRef())
253 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()));
254 }
255 // Temp copy arrays for inscan reductions should not be emitted as they are
256 // not used in simd only mode.
257 llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps;
258 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
259 if (C->getModifier() != OMPC_REDUCTION_inscan)
260 continue;
261 for (const Expr *E : C->copy_array_temps())
262 CopyArrayTemps.insert(cast<DeclRefExpr>(Val: E)->getDecl());
263 }
264 const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt());
265 while (CS) {
266 for (auto &C : CS->captures()) {
267 if (C.capturesVariable() || C.capturesVariableByCopy()) {
268 auto *VD = C.getCapturedVar();
269 if (CopyArrayTemps.contains(VD))
270 continue;
271 assert(VD == VD->getCanonicalDecl() &&
272 "Canonical decl must be captured.");
273 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD),
274 isCapturedVar(CGF, VD) ||
275 (CGF.CapturedStmtInfo &&
276 InlinedShareds.isGlobalVarCaptured(VD)),
277 VD->getType().getNonReferenceType(), VK_LValue,
278 C.getLocation());
279 InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(&DRE).getAddress(CGF));
280 }
281 }
282 CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt());
283 }
284 (void)InlinedShareds.Privatize();
285 }
286};
287
288} // namespace
289
290static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
291 const OMPExecutableDirective &S,
292 const RegionCodeGenTy &CodeGen);
293
294LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) {
295 if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) {
296 if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) {
297 OrigVD = OrigVD->getCanonicalDecl();
298 bool IsCaptured =
299 LambdaCaptureFields.lookup(OrigVD) ||
300 (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) ||
301 (CurCodeDecl && isa<BlockDecl>(Val: CurCodeDecl));
302 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured,
303 OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc());
304 return EmitLValue(&DRE);
305 }
306 }
307 return EmitLValue(E);
308}
309
310llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) {
311 ASTContext &C = getContext();
312 llvm::Value *Size = nullptr;
313 auto SizeInChars = C.getTypeSizeInChars(T: Ty);
314 if (SizeInChars.isZero()) {
315 // getTypeSizeInChars() returns 0 for a VLA.
316 while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) {
317 VlaSizePair VlaSize = getVLASize(vla: VAT);
318 Ty = VlaSize.Type;
319 Size =
320 Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts;
321 }
322 SizeInChars = C.getTypeSizeInChars(T: Ty);
323 if (SizeInChars.isZero())
324 return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0);
325 return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars));
326 }
327 return CGM.getSize(numChars: SizeInChars);
328}
329
330void CodeGenFunction::GenerateOpenMPCapturedVars(
331 const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) {
332 const RecordDecl *RD = S.getCapturedRecordDecl();
333 auto CurField = RD->field_begin();
334 auto CurCap = S.captures().begin();
335 for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(),
336 E = S.capture_init_end();
337 I != E; ++I, ++CurField, ++CurCap) {
338 if (CurField->hasCapturedVLAType()) {
339 const VariableArrayType *VAT = CurField->getCapturedVLAType();
340 llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()];
341 CapturedVars.push_back(Elt: Val);
342 } else if (CurCap->capturesThis()) {
343 CapturedVars.push_back(Elt: CXXThisValue);
344 } else if (CurCap->capturesVariableByCopy()) {
345 llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation());
346
347 // If the field is not a pointer, we need to save the actual value
348 // and load it as a void pointer.
349 if (!CurField->getType()->isAnyPointerType()) {
350 ASTContext &Ctx = getContext();
351 Address DstAddr = CreateMemTemp(
352 T: Ctx.getUIntPtrType(),
353 Name: Twine(CurCap->getCapturedVar()->getName(), ".casted"));
354 LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType());
355
356 llvm::Value *SrcAddrVal = EmitScalarConversion(
357 Src: DstAddr.emitRawPointer(CGF&: *this),
358 SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()),
359 DstTy: Ctx.getPointerType(CurField->getType()), Loc: CurCap->getLocation());
360 LValue SrcLV =
361 MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType());
362
363 // Store the value using the source type pointer.
364 EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV);
365
366 // Load the value using the destination type pointer.
367 CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation());
368 }
369 CapturedVars.push_back(Elt: CV);
370 } else {
371 assert(CurCap->capturesVariable() && "Expected capture by reference.");
372 CapturedVars.push_back(
373 Elt: EmitLValue(E: *I).getAddress(CGF&: *this).emitRawPointer(CGF&: *this));
374 }
375 }
376}
377
378static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc,
379 QualType DstType, StringRef Name,
380 LValue AddrLV) {
381 ASTContext &Ctx = CGF.getContext();
382
383 llvm::Value *CastedPtr = CGF.EmitScalarConversion(
384 Src: AddrLV.getAddress(CGF).emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(),
385 DstTy: Ctx.getPointerType(T: DstType), Loc);
386 // FIXME: should the pointee type (DstType) be passed?
387 Address TmpAddr =
388 CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress(CGF);
389 return TmpAddr;
390}
391
392static QualType getCanonicalParamType(ASTContext &C, QualType T) {
393 if (T->isLValueReferenceType())
394 return C.getLValueReferenceType(
395 T: getCanonicalParamType(C, T: T.getNonReferenceType()),
396 /*SpelledAsLValue=*/false);
397 if (T->isPointerType())
398 return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType()));
399 if (const ArrayType *A = T->getAsArrayTypeUnsafe()) {
400 if (const auto *VLA = dyn_cast<VariableArrayType>(A))
401 return getCanonicalParamType(C, VLA->getElementType());
402 if (!A->isVariablyModifiedType())
403 return C.getCanonicalType(T);
404 }
405 return C.getCanonicalParamType(T);
406}
407
408namespace {
409/// Contains required data for proper outlined function codegen.
410struct FunctionOptions {
411 /// Captured statement for which the function is generated.
412 const CapturedStmt *S = nullptr;
413 /// true if cast to/from UIntPtr is required for variables captured by
414 /// value.
415 const bool UIntPtrCastRequired = true;
416 /// true if only casted arguments must be registered as local args or VLA
417 /// sizes.
418 const bool RegisterCastedArgsOnly = false;
419 /// Name of the generated function.
420 const StringRef FunctionName;
421 /// Location of the non-debug version of the outlined function.
422 SourceLocation Loc;
423 explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired,
424 bool RegisterCastedArgsOnly, StringRef FunctionName,
425 SourceLocation Loc)
426 : S(S), UIntPtrCastRequired(UIntPtrCastRequired),
427 RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly),
428 FunctionName(FunctionName), Loc(Loc) {}
429};
430} // namespace
431
432static llvm::Function *emitOutlinedFunctionPrologue(
433 CodeGenFunction &CGF, FunctionArgList &Args,
434 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>>
435 &LocalAddrs,
436 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>>
437 &VLASizes,
438 llvm::Value *&CXXThisValue, const FunctionOptions &FO) {
439 const CapturedDecl *CD = FO.S->getCapturedDecl();
440 const RecordDecl *RD = FO.S->getCapturedRecordDecl();
441 assert(CD->hasBody() && "missing CapturedDecl body");
442
443 CXXThisValue = nullptr;
444 // Build the argument list.
445 CodeGenModule &CGM = CGF.CGM;
446 ASTContext &Ctx = CGM.getContext();
447 FunctionArgList TargetArgs;
448 Args.append(in_start: CD->param_begin(),
449 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
450 TargetArgs.append(
451 in_start: CD->param_begin(),
452 in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition()));
453 auto I = FO.S->captures().begin();
454 FunctionDecl *DebugFunctionDecl = nullptr;
455 if (!FO.UIntPtrCastRequired) {
456 FunctionProtoType::ExtProtoInfo EPI;
457 QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: std::nullopt, EPI);
458 DebugFunctionDecl = FunctionDecl::Create(
459 Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(),
460 SourceLocation(), DeclarationName(), FunctionTy,
461 Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC_Static,
462 /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false,
463 /*hasWrittenPrototype=*/false);
464 }
465 for (const FieldDecl *FD : RD->fields()) {
466 QualType ArgType = FD->getType();
467 IdentifierInfo *II = nullptr;
468 VarDecl *CapVar = nullptr;
469
470 // If this is a capture by copy and the type is not a pointer, the outlined
471 // function argument type should be uintptr and the value properly casted to
472 // uintptr. This is necessary given that the runtime library is only able to
473 // deal with pointers. We can pass in the same way the VLA type sizes to the
474 // outlined function.
475 if (FO.UIntPtrCastRequired &&
476 ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) ||
477 I->capturesVariableArrayType()))
478 ArgType = Ctx.getUIntPtrType();
479
480 if (I->capturesVariable() || I->capturesVariableByCopy()) {
481 CapVar = I->getCapturedVar();
482 II = CapVar->getIdentifier();
483 } else if (I->capturesThis()) {
484 II = &Ctx.Idents.get(Name: "this");
485 } else {
486 assert(I->capturesVariableArrayType());
487 II = &Ctx.Idents.get(Name: "vla");
488 }
489 if (ArgType->isVariablyModifiedType())
490 ArgType = getCanonicalParamType(C&: Ctx, T: ArgType);
491 VarDecl *Arg;
492 if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) {
493 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
494 II, ArgType,
495 ImplicitParamKind::ThreadPrivateVar);
496 } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) {
497 Arg = ParmVarDecl::Create(
498 C&: Ctx, DC: DebugFunctionDecl,
499 StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(),
500 IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType,
501 /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr);
502 } else {
503 Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(),
504 II, ArgType, ImplicitParamKind::Other);
505 }
506 Args.emplace_back(Args&: Arg);
507 // Do not cast arguments if we emit function with non-original types.
508 TargetArgs.emplace_back(
509 Args: FO.UIntPtrCastRequired
510 ? Arg
511 : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg));
512 ++I;
513 }
514 Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
515 in_end: CD->param_end());
516 TargetArgs.append(
517 in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1),
518 in_end: CD->param_end());
519
520 // Create the function declaration.
521 const CGFunctionInfo &FuncInfo =
522 CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs);
523 llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo);
524
525 auto *F =
526 llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage,
527 N: FO.FunctionName, M: &CGM.getModule());
528 CGM.SetInternalFunctionAttributes(GD: CD, F: F, FI: FuncInfo);
529 if (CD->isNothrow())
530 F->setDoesNotThrow();
531 F->setDoesNotRecurse();
532
533 // Always inline the outlined function if optimizations are enabled.
534 if (CGM.getCodeGenOpts().OptimizationLevel != 0) {
535 F->removeFnAttr(llvm::Attribute::NoInline);
536 F->addFnAttr(llvm::Attribute::AlwaysInline);
537 }
538
539 // Generate the function.
540 CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs,
541 Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(),
542 StartLoc: FO.UIntPtrCastRequired ? FO.Loc
543 : CD->getBody()->getBeginLoc());
544 unsigned Cnt = CD->getContextParamPosition();
545 I = FO.S->captures().begin();
546 for (const FieldDecl *FD : RD->fields()) {
547 // Do not map arguments if we emit function with non-original types.
548 Address LocalAddr(Address::invalid());
549 if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) {
550 LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt],
551 TargetParam: TargetArgs[Cnt]);
552 } else {
553 LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]);
554 }
555 // If we are capturing a pointer by copy we don't need to do anything, just
556 // use the value that we get from the arguments.
557 if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) {
558 const VarDecl *CurVD = I->getCapturedVar();
559 if (!FO.RegisterCastedArgsOnly)
560 LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}});
561 ++Cnt;
562 ++I;
563 continue;
564 }
565
566 LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(),
567 AlignmentSource::Decl);
568 if (FD->hasCapturedVLAType()) {
569 if (FO.UIntPtrCastRequired) {
570 ArgLVal = CGF.MakeAddrLValue(
571 castValueFromUintptr(CGF, I->getLocation(), FD->getType(),
572 Args[Cnt]->getName(), ArgLVal),
573 FD->getType(), AlignmentSource::Decl);
574 }
575 llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
576 const VariableArrayType *VAT = FD->getCapturedVLAType();
577 VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg);
578 } else if (I->capturesVariable()) {
579 const VarDecl *Var = I->getCapturedVar();
580 QualType VarTy = Var->getType();
581 Address ArgAddr = ArgLVal.getAddress(CGF);
582 if (ArgLVal.getType()->isLValueReferenceType()) {
583 ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal);
584 } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) {
585 assert(ArgLVal.getType()->isPointerType());
586 ArgAddr = CGF.EmitLoadOfPointer(
587 Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>());
588 }
589 if (!FO.RegisterCastedArgsOnly) {
590 LocalAddrs.insert(
591 {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(Var))}});
592 }
593 } else if (I->capturesVariableByCopy()) {
594 assert(!FD->getType()->isAnyPointerType() &&
595 "Not expecting a captured pointer.");
596 const VarDecl *Var = I->getCapturedVar();
597 LocalAddrs.insert({Args[Cnt],
598 {Var, FO.UIntPtrCastRequired
599 ? castValueFromUintptr(
600 CGF, I->getLocation(), FD->getType(),
601 Args[Cnt]->getName(), ArgLVal)
602 : ArgLVal.getAddress(CGF)}});
603 } else {
604 // If 'this' is captured, load it into CXXThisValue.
605 assert(I->capturesThis());
606 CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation());
607 LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}});
608 }
609 ++Cnt;
610 ++I;
611 }
612
613 return F;
614}
615
616llvm::Function *
617CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S,
618 SourceLocation Loc) {
619 assert(
620 CapturedStmtInfo &&
621 "CapturedStmtInfo should be set when generating the captured function");
622 const CapturedDecl *CD = S.getCapturedDecl();
623 // Build the argument list.
624 bool NeedWrapperFunction =
625 getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo();
626 FunctionArgList Args;
627 llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs;
628 llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes;
629 SmallString<256> Buffer;
630 llvm::raw_svector_ostream Out(Buffer);
631 Out << CapturedStmtInfo->getHelperName();
632 if (NeedWrapperFunction)
633 Out << "_debug__";
634 FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false,
635 Out.str(), Loc);
636 llvm::Function *F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs,
637 VLASizes, CXXThisValue, FO);
638 CodeGenFunction::OMPPrivateScope LocalScope(*this);
639 for (const auto &LocalAddrPair : LocalAddrs) {
640 if (LocalAddrPair.second.first) {
641 LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first,
642 Addr: LocalAddrPair.second.second);
643 }
644 }
645 (void)LocalScope.Privatize();
646 for (const auto &VLASizePair : VLASizes)
647 VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second;
648 PGO.assignRegionCounters(GD: GlobalDecl(CD), Fn: F);
649 CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody());
650 (void)LocalScope.ForceCleanup();
651 FinishFunction(EndLoc: CD->getBodyRBrace());
652 if (!NeedWrapperFunction)
653 return F;
654
655 FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true,
656 /*RegisterCastedArgsOnly=*/true,
657 CapturedStmtInfo->getHelperName(), Loc);
658 CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true);
659 WrapperCGF.CapturedStmtInfo = CapturedStmtInfo;
660 Args.clear();
661 LocalAddrs.clear();
662 VLASizes.clear();
663 llvm::Function *WrapperF =
664 emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes,
665 CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO);
666 llvm::SmallVector<llvm::Value *, 4> CallArgs;
667 auto *PI = F->arg_begin();
668 for (const auto *Arg : Args) {
669 llvm::Value *CallArg;
670 auto I = LocalAddrs.find(Arg);
671 if (I != LocalAddrs.end()) {
672 LValue LV = WrapperCGF.MakeAddrLValue(
673 I->second.second,
674 I->second.first ? I->second.first->getType() : Arg->getType(),
675 AlignmentSource::Decl);
676 if (LV.getType()->isAnyComplexType())
677 LV.setAddress(LV.getAddress(CGF&: WrapperCGF).withElementType(ElemTy: PI->getType()));
678 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
679 } else {
680 auto EI = VLASizes.find(Arg);
681 if (EI != VLASizes.end()) {
682 CallArg = EI->second.second;
683 } else {
684 LValue LV =
685 WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(VD: Arg),
686 Arg->getType(), AlignmentSource::Decl);
687 CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc());
688 }
689 }
690 CallArgs.emplace_back(WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType()));
691 ++PI;
692 }
693 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs);
694 WrapperCGF.FinishFunction();
695 return WrapperF;
696}
697
698//===----------------------------------------------------------------------===//
699// OpenMP Directive Emission
700//===----------------------------------------------------------------------===//
701void CodeGenFunction::EmitOMPAggregateAssign(
702 Address DestAddr, Address SrcAddr, QualType OriginalType,
703 const llvm::function_ref<void(Address, Address)> CopyGen) {
704 // Perform element-by-element initialization.
705 QualType ElementTy;
706
707 // Drill down to the base element type on both arrays.
708 const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe();
709 llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr);
710 SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType());
711
712 llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this);
713 llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this);
714 // Cast from pointer to array type to pointer to single element.
715 llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(),
716 Ptr: DestBegin, IdxList: NumElements);
717
718 // The basic structure here is a while-do loop.
719 llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body");
720 llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done");
721 llvm::Value *IsEmpty =
722 Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty");
723 Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB);
724
725 // Enter the loop body, making that address the current address.
726 llvm::BasicBlock *EntryBB = Builder.GetInsertBlock();
727 EmitBlock(BB: BodyBB);
728
729 CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy);
730
731 llvm::PHINode *SrcElementPHI =
732 Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast");
733 SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB);
734 Address SrcElementCurrent =
735 Address(SrcElementPHI, SrcAddr.getElementType(),
736 SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
737
738 llvm::PHINode *DestElementPHI = Builder.CreatePHI(
739 Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast");
740 DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB);
741 Address DestElementCurrent =
742 Address(DestElementPHI, DestAddr.getElementType(),
743 DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize));
744
745 // Emit copy.
746 CopyGen(DestElementCurrent, SrcElementCurrent);
747
748 // Shift the address forward by one element.
749 llvm::Value *DestElementNext =
750 Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI,
751 /*Idx0=*/1, Name: "omp.arraycpy.dest.element");
752 llvm::Value *SrcElementNext =
753 Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI,
754 /*Idx0=*/1, Name: "omp.arraycpy.src.element");
755 // Check whether we've reached the end.
756 llvm::Value *Done =
757 Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done");
758 Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB);
759 DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock());
760 SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock());
761
762 // Done.
763 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
764}
765
766void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr,
767 Address SrcAddr, const VarDecl *DestVD,
768 const VarDecl *SrcVD, const Expr *Copy) {
769 if (OriginalType->isArrayType()) {
770 const auto *BO = dyn_cast<BinaryOperator>(Val: Copy);
771 if (BO && BO->getOpcode() == BO_Assign) {
772 // Perform simple memcpy for simple copying.
773 LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType);
774 LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType);
775 EmitAggregateAssign(Dest, Src, EltTy: OriginalType);
776 } else {
777 // For arrays with complex element types perform element by element
778 // copying.
779 EmitOMPAggregateAssign(
780 DestAddr, SrcAddr, OriginalType,
781 CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) {
782 // Working with the single array element, so have to remap
783 // destination and source variables to corresponding array
784 // elements.
785 CodeGenFunction::OMPPrivateScope Remap(*this);
786 Remap.addPrivate(LocalVD: DestVD, Addr: DestElement);
787 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement);
788 (void)Remap.Privatize();
789 EmitIgnoredExpr(E: Copy);
790 });
791 }
792 } else {
793 // Remap pseudo source variable to private copy.
794 CodeGenFunction::OMPPrivateScope Remap(*this);
795 Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr);
796 Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr);
797 (void)Remap.Privatize();
798 // Emit copying of the whole variable.
799 EmitIgnoredExpr(E: Copy);
800 }
801}
802
803bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D,
804 OMPPrivateScope &PrivateScope) {
805 if (!HaveInsertPoint())
806 return false;
807 bool DeviceConstTarget =
808 getLangOpts().OpenMPIsTargetDevice &&
809 isOpenMPTargetExecutionDirective(D.getDirectiveKind());
810 bool FirstprivateIsLastprivate = false;
811 llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates;
812 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
813 for (const auto *D : C->varlists())
814 Lastprivates.try_emplace(
815 cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(),
816 C->getKind());
817 }
818 llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate;
819 llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
820 getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind());
821 // Force emission of the firstprivate copy if the directive does not emit
822 // outlined function, like omp for, omp simd, omp distribute etc.
823 bool MustEmitFirstprivateCopy =
824 CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown;
825 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
826 const auto *IRef = C->varlist_begin();
827 const auto *InitsRef = C->inits().begin();
828 for (const Expr *IInit : C->private_copies()) {
829 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
830 bool ThisFirstprivateIsLastprivate =
831 Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0;
832 const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD);
833 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
834 if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD &&
835 !FD->getType()->isReferenceType() &&
836 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
837 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
838 ++IRef;
839 ++InitsRef;
840 continue;
841 }
842 // Do not emit copy for firstprivate constant variables in target regions,
843 // captured by reference.
844 if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) &&
845 FD && FD->getType()->isReferenceType() &&
846 (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) {
847 EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl());
848 ++IRef;
849 ++InitsRef;
850 continue;
851 }
852 FirstprivateIsLastprivate =
853 FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate;
854 if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) {
855 const auto *VDInit =
856 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl());
857 bool IsRegistered;
858 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
859 /*RefersToEnclosingVariableOrCapture=*/FD != nullptr,
860 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
861 LValue OriginalLVal;
862 if (!FD) {
863 // Check if the firstprivate variable is just a constant value.
864 ConstantEmission CE = tryEmitAsConstant(refExpr: &DRE);
865 if (CE && !CE.isReference()) {
866 // Constant value, no need to create a copy.
867 ++IRef;
868 ++InitsRef;
869 continue;
870 }
871 if (CE && CE.isReference()) {
872 OriginalLVal = CE.getReferenceLValue(*this, &DRE);
873 } else {
874 assert(!CE && "Expected non-constant firstprivate.");
875 OriginalLVal = EmitLValue(&DRE);
876 }
877 } else {
878 OriginalLVal = EmitLValue(&DRE);
879 }
880 QualType Type = VD->getType();
881 if (Type->isArrayType()) {
882 // Emit VarDecl with copy init for arrays.
883 // Get the address of the original variable captured in current
884 // captured region.
885 AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD);
886 const Expr *Init = VD->getInit();
887 if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) {
888 // Perform simple memcpy.
889 LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type);
890 EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type);
891 } else {
892 EmitOMPAggregateAssign(
893 DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(CGF&: *this),
894 OriginalType: Type,
895 CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) {
896 // Clean up any temporaries needed by the
897 // initialization.
898 RunCleanupsScope InitScope(*this);
899 // Emit initialization for single element.
900 setAddrOfLocalVar(VD: VDInit, Addr: SrcElement);
901 EmitAnyExprToMem(E: Init, Location: DestElement,
902 Quals: Init->getType().getQualifiers(),
903 /*IsInitializer*/ false);
904 LocalDeclMap.erase(VDInit);
905 });
906 }
907 EmitAutoVarCleanups(emission: Emission);
908 IsRegistered =
909 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress());
910 } else {
911 Address OriginalAddr = OriginalLVal.getAddress(CGF&: *this);
912 // Emit private VarDecl with copy init.
913 // Remap temp VDInit variable to the address of the original
914 // variable (for proper handling of captured global variables).
915 setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr);
916 EmitDecl(*VD);
917 LocalDeclMap.erase(VDInit);
918 Address VDAddr = GetAddrOfLocalVar(VD);
919 if (ThisFirstprivateIsLastprivate &&
920 Lastprivates[OrigVD->getCanonicalDecl()] ==
921 OMPC_LASTPRIVATE_conditional) {
922 // Create/init special variable for lastprivate conditionals.
923 llvm::Value *V =
924 EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(),
925 AlignmentSource::Decl),
926 (*IRef)->getExprLoc());
927 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
928 CGF&: *this, VD: OrigVD);
929 EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(),
930 AlignmentSource::Decl));
931 LocalDeclMap.erase(VD);
932 setAddrOfLocalVar(VD, Addr: VDAddr);
933 }
934 IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
935 }
936 assert(IsRegistered &&
937 "firstprivate var already registered as private");
938 // Silence the warning about unused variable.
939 (void)IsRegistered;
940 }
941 ++IRef;
942 ++InitsRef;
943 }
944 }
945 return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty();
946}
947
948void CodeGenFunction::EmitOMPPrivateClause(
949 const OMPExecutableDirective &D,
950 CodeGenFunction::OMPPrivateScope &PrivateScope) {
951 if (!HaveInsertPoint())
952 return;
953 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
954 for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) {
955 auto IRef = C->varlist_begin();
956 for (const Expr *IInit : C->private_copies()) {
957 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
958 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
959 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
960 EmitDecl(*VD);
961 // Emit private VarDecl with copy init.
962 bool IsRegistered =
963 PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD));
964 assert(IsRegistered && "private var already registered as private");
965 // Silence the warning about unused variable.
966 (void)IsRegistered;
967 }
968 ++IRef;
969 }
970 }
971}
972
973bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) {
974 if (!HaveInsertPoint())
975 return false;
976 // threadprivate_var1 = master_threadprivate_var1;
977 // operator=(threadprivate_var2, master_threadprivate_var2);
978 // ...
979 // __kmpc_barrier(&loc, global_tid);
980 llvm::DenseSet<const VarDecl *> CopiedVars;
981 llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr;
982 for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) {
983 auto IRef = C->varlist_begin();
984 auto ISrcRef = C->source_exprs().begin();
985 auto IDestRef = C->destination_exprs().begin();
986 for (const Expr *AssignOp : C->assignment_ops()) {
987 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
988 QualType Type = VD->getType();
989 if (CopiedVars.insert(VD->getCanonicalDecl()).second) {
990 // Get the address of the master variable. If we are emitting code with
991 // TLS support, the address is passed from the master as field in the
992 // captured declaration.
993 Address MasterAddr = Address::invalid();
994 if (getLangOpts().OpenMPUseTLS &&
995 getContext().getTargetInfo().isTLSSupported()) {
996 assert(CapturedStmtInfo->lookup(VD) &&
997 "Copyin threadprivates should have been captured!");
998 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true,
999 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1000 MasterAddr = EmitLValue(&DRE).getAddress(CGF&: *this);
1001 LocalDeclMap.erase(VD);
1002 } else {
1003 MasterAddr =
1004 Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD)
1005 : CGM.GetAddrOfGlobal(GD: VD),
1006 CGM.getTypes().ConvertTypeForMem(T: VD->getType()),
1007 getContext().getDeclAlign(D: VD));
1008 }
1009 // Get the address of the threadprivate variable.
1010 Address PrivateAddr = EmitLValue(E: *IRef).getAddress(*this);
1011 if (CopiedVars.size() == 1) {
1012 // At first check if current thread is a master thread. If it is, no
1013 // need to copy data.
1014 CopyBegin = createBasicBlock(name: "copyin.not.master");
1015 CopyEnd = createBasicBlock(name: "copyin.not.master.end");
1016 // TODO: Avoid ptrtoint conversion.
1017 auto *MasterAddrInt = Builder.CreatePtrToInt(
1018 V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1019 auto *PrivateAddrInt = Builder.CreatePtrToInt(
1020 V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy);
1021 Builder.CreateCondBr(
1022 Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), CopyBegin,
1023 CopyEnd);
1024 EmitBlock(BB: CopyBegin);
1025 }
1026 const auto *SrcVD =
1027 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1028 const auto *DestVD =
1029 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1030 EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp);
1031 }
1032 ++IRef;
1033 ++ISrcRef;
1034 ++IDestRef;
1035 }
1036 }
1037 if (CopyEnd) {
1038 // Exit out of copying procedure for non-master thread.
1039 EmitBlock(BB: CopyEnd, /*IsFinished=*/true);
1040 return true;
1041 }
1042 return false;
1043}
1044
1045bool CodeGenFunction::EmitOMPLastprivateClauseInit(
1046 const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) {
1047 if (!HaveInsertPoint())
1048 return false;
1049 bool HasAtLeastOneLastprivate = false;
1050 llvm::DenseSet<const VarDecl *> SIMDLCVs;
1051 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
1052 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
1053 for (const Expr *C : LoopDirective->counters()) {
1054 SIMDLCVs.insert(
1055 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
1056 }
1057 }
1058 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1059 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1060 HasAtLeastOneLastprivate = true;
1061 if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) &&
1062 !getLangOpts().OpenMPSimd)
1063 break;
1064 const auto *IRef = C->varlist_begin();
1065 const auto *IDestRef = C->destination_exprs().begin();
1066 for (const Expr *IInit : C->private_copies()) {
1067 // Keep the address of the original variable for future update at the end
1068 // of the loop.
1069 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1070 // Taskloops do not require additional initialization, it is done in
1071 // runtime support library.
1072 if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) {
1073 const auto *DestVD =
1074 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1075 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
1076 /*RefersToEnclosingVariableOrCapture=*/
1077 CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
1078 (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc());
1079 PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(&DRE).getAddress(CGF&: *this));
1080 // Check if the variable is also a firstprivate: in this case IInit is
1081 // not generated. Initialization of this variable will happen in codegen
1082 // for 'firstprivate' clause.
1083 if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) {
1084 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl());
1085 Address VDAddr = Address::invalid();
1086 if (C->getKind() == OMPC_LASTPRIVATE_conditional) {
1087 VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit(
1088 CGF&: *this, VD: OrigVD);
1089 setAddrOfLocalVar(VD, Addr: VDAddr);
1090 } else {
1091 // Emit private VarDecl with copy init.
1092 EmitDecl(*VD);
1093 VDAddr = GetAddrOfLocalVar(VD);
1094 }
1095 bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr);
1096 assert(IsRegistered &&
1097 "lastprivate var already registered as private");
1098 (void)IsRegistered;
1099 }
1100 }
1101 ++IRef;
1102 ++IDestRef;
1103 }
1104 }
1105 return HasAtLeastOneLastprivate;
1106}
1107
1108void CodeGenFunction::EmitOMPLastprivateClauseFinal(
1109 const OMPExecutableDirective &D, bool NoFinals,
1110 llvm::Value *IsLastIterCond) {
1111 if (!HaveInsertPoint())
1112 return;
1113 // Emit following code:
1114 // if (<IsLastIterCond>) {
1115 // orig_var1 = private_orig_var1;
1116 // ...
1117 // orig_varn = private_orig_varn;
1118 // }
1119 llvm::BasicBlock *ThenBB = nullptr;
1120 llvm::BasicBlock *DoneBB = nullptr;
1121 if (IsLastIterCond) {
1122 // Emit implicit barrier if at least one lastprivate conditional is found
1123 // and this is not a simd mode.
1124 if (!getLangOpts().OpenMPSimd &&
1125 llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(),
1126 P: [](const OMPLastprivateClause *C) {
1127 return C->getKind() == OMPC_LASTPRIVATE_conditional;
1128 })) {
1129 CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(),
1130 OMPD_unknown,
1131 /*EmitChecks=*/false,
1132 /*ForceSimpleCall=*/true);
1133 }
1134 ThenBB = createBasicBlock(name: ".omp.lastprivate.then");
1135 DoneBB = createBasicBlock(name: ".omp.lastprivate.done");
1136 Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB);
1137 EmitBlock(BB: ThenBB);
1138 }
1139 llvm::DenseSet<const VarDecl *> AlreadyEmittedVars;
1140 llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates;
1141 if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) {
1142 auto IC = LoopDirective->counters().begin();
1143 for (const Expr *F : LoopDirective->finals()) {
1144 const auto *D =
1145 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl();
1146 if (NoFinals)
1147 AlreadyEmittedVars.insert(V: D);
1148 else
1149 LoopCountersAndUpdates[D] = F;
1150 ++IC;
1151 }
1152 }
1153 for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) {
1154 auto IRef = C->varlist_begin();
1155 auto ISrcRef = C->source_exprs().begin();
1156 auto IDestRef = C->destination_exprs().begin();
1157 for (const Expr *AssignOp : C->assignment_ops()) {
1158 const auto *PrivateVD =
1159 cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
1160 QualType Type = PrivateVD->getType();
1161 const auto *CanonicalVD = PrivateVD->getCanonicalDecl();
1162 if (AlreadyEmittedVars.insert(CanonicalVD).second) {
1163 // If lastprivate variable is a loop control variable for loop-based
1164 // directive, update its value before copyin back to original
1165 // variable.
1166 if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD))
1167 EmitIgnoredExpr(E: FinalExpr);
1168 const auto *SrcVD =
1169 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl());
1170 const auto *DestVD =
1171 cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl());
1172 // Get the address of the private variable.
1173 Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD);
1174 if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>())
1175 PrivateAddr = Address(
1176 Builder.CreateLoad(Addr: PrivateAddr),
1177 CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()),
1178 CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType()));
1179 // Store the last value to the private copy in the last iteration.
1180 if (C->getKind() == OMPC_LASTPRIVATE_conditional)
1181 CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate(
1182 CGF&: *this, PrivLVal: MakeAddrLValue(PrivateAddr, (*IRef)->getType()), VD: PrivateVD,
1183 Loc: (*IRef)->getExprLoc());
1184 // Get the address of the original variable.
1185 Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD);
1186 EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp);
1187 }
1188 ++IRef;
1189 ++ISrcRef;
1190 ++IDestRef;
1191 }
1192 if (const Expr *PostUpdate = C->getPostUpdateExpr())
1193 EmitIgnoredExpr(E: PostUpdate);
1194 }
1195 if (IsLastIterCond)
1196 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1197}
1198
1199void CodeGenFunction::EmitOMPReductionClauseInit(
1200 const OMPExecutableDirective &D,
1201 CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) {
1202 if (!HaveInsertPoint())
1203 return;
1204 SmallVector<const Expr *, 4> Shareds;
1205 SmallVector<const Expr *, 4> Privates;
1206 SmallVector<const Expr *, 4> ReductionOps;
1207 SmallVector<const Expr *, 4> LHSs;
1208 SmallVector<const Expr *, 4> RHSs;
1209 OMPTaskDataTy Data;
1210 SmallVector<const Expr *, 4> TaskLHSs;
1211 SmallVector<const Expr *, 4> TaskRHSs;
1212 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1213 if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan))
1214 continue;
1215 Shareds.append(C->varlist_begin(), C->varlist_end());
1216 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1217 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1218 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1219 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1220 if (C->getModifier() == OMPC_REDUCTION_task) {
1221 Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end());
1222 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
1223 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
1224 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
1225 in_end: C->reduction_ops().end());
1226 TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1227 TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1228 }
1229 }
1230 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
1231 unsigned Count = 0;
1232 auto *ILHS = LHSs.begin();
1233 auto *IRHS = RHSs.begin();
1234 auto *IPriv = Privates.begin();
1235 for (const Expr *IRef : Shareds) {
1236 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl());
1237 // Emit private VarDecl with reduction init.
1238 RedCG.emitSharedOrigLValue(CGF&: *this, N: Count);
1239 RedCG.emitAggregateType(CGF&: *this, N: Count);
1240 AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD);
1241 RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(),
1242 SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this),
1243 DefaultInit: [&Emission](CodeGenFunction &CGF) {
1244 CGF.EmitAutoVarInit(emission: Emission);
1245 return true;
1246 });
1247 EmitAutoVarCleanups(emission: Emission);
1248 Address BaseAddr = RedCG.adjustPrivateAddress(
1249 CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress());
1250 bool IsRegistered =
1251 PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr);
1252 assert(IsRegistered && "private var already registered as private");
1253 // Silence the warning about unused variable.
1254 (void)IsRegistered;
1255
1256 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
1257 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
1258 QualType Type = PrivateVD->getType();
1259 bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(Val: IRef);
1260 if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) {
1261 // Store the address of the original variable associated with the LHS
1262 // implicit variable.
1263 PrivateScope.addPrivate(LocalVD: LHSVD,
1264 Addr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this));
1265 PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD));
1266 } else if ((isaOMPArraySectionExpr && Type->isScalarType()) ||
1267 isa<ArraySubscriptExpr>(Val: IRef)) {
1268 // Store the address of the original variable associated with the LHS
1269 // implicit variable.
1270 PrivateScope.addPrivate(LocalVD: LHSVD,
1271 Addr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this));
1272 PrivateScope.addPrivate(LocalVD: RHSVD,
1273 Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1274 ElemTy: ConvertTypeForMem(T: RHSVD->getType())));
1275 } else {
1276 QualType Type = PrivateVD->getType();
1277 bool IsArray = getContext().getAsArrayType(T: Type) != nullptr;
1278 Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this);
1279 // Store the address of the original variable associated with the LHS
1280 // implicit variable.
1281 if (IsArray) {
1282 OriginalAddr =
1283 OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType()));
1284 }
1285 PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr);
1286 PrivateScope.addPrivate(
1287 LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType(
1288 ElemTy: ConvertTypeForMem(T: RHSVD->getType()))
1289 : GetAddrOfLocalVar(VD: PrivateVD));
1290 }
1291 ++ILHS;
1292 ++IRHS;
1293 ++IPriv;
1294 ++Count;
1295 }
1296 if (!Data.ReductionVars.empty()) {
1297 Data.IsReductionWithTaskMod = true;
1298 Data.IsWorksharingReduction =
1299 isOpenMPWorksharingDirective(D.getDirectiveKind());
1300 llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit(
1301 CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data);
1302 const Expr *TaskRedRef = nullptr;
1303 switch (D.getDirectiveKind()) {
1304 case OMPD_parallel:
1305 TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr();
1306 break;
1307 case OMPD_for:
1308 TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr();
1309 break;
1310 case OMPD_sections:
1311 TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr();
1312 break;
1313 case OMPD_parallel_for:
1314 TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr();
1315 break;
1316 case OMPD_parallel_master:
1317 TaskRedRef =
1318 cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr();
1319 break;
1320 case OMPD_parallel_sections:
1321 TaskRedRef =
1322 cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr();
1323 break;
1324 case OMPD_target_parallel:
1325 TaskRedRef =
1326 cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr();
1327 break;
1328 case OMPD_target_parallel_for:
1329 TaskRedRef =
1330 cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr();
1331 break;
1332 case OMPD_distribute_parallel_for:
1333 TaskRedRef =
1334 cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr();
1335 break;
1336 case OMPD_teams_distribute_parallel_for:
1337 TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D)
1338 .getTaskReductionRefExpr();
1339 break;
1340 case OMPD_target_teams_distribute_parallel_for:
1341 TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D)
1342 .getTaskReductionRefExpr();
1343 break;
1344 case OMPD_simd:
1345 case OMPD_for_simd:
1346 case OMPD_section:
1347 case OMPD_single:
1348 case OMPD_master:
1349 case OMPD_critical:
1350 case OMPD_parallel_for_simd:
1351 case OMPD_task:
1352 case OMPD_taskyield:
1353 case OMPD_error:
1354 case OMPD_barrier:
1355 case OMPD_taskwait:
1356 case OMPD_taskgroup:
1357 case OMPD_flush:
1358 case OMPD_depobj:
1359 case OMPD_scan:
1360 case OMPD_ordered:
1361 case OMPD_atomic:
1362 case OMPD_teams:
1363 case OMPD_target:
1364 case OMPD_cancellation_point:
1365 case OMPD_cancel:
1366 case OMPD_target_data:
1367 case OMPD_target_enter_data:
1368 case OMPD_target_exit_data:
1369 case OMPD_taskloop:
1370 case OMPD_taskloop_simd:
1371 case OMPD_master_taskloop:
1372 case OMPD_master_taskloop_simd:
1373 case OMPD_parallel_master_taskloop:
1374 case OMPD_parallel_master_taskloop_simd:
1375 case OMPD_distribute:
1376 case OMPD_target_update:
1377 case OMPD_distribute_parallel_for_simd:
1378 case OMPD_distribute_simd:
1379 case OMPD_target_parallel_for_simd:
1380 case OMPD_target_simd:
1381 case OMPD_teams_distribute:
1382 case OMPD_teams_distribute_simd:
1383 case OMPD_teams_distribute_parallel_for_simd:
1384 case OMPD_target_teams:
1385 case OMPD_target_teams_distribute:
1386 case OMPD_target_teams_distribute_parallel_for_simd:
1387 case OMPD_target_teams_distribute_simd:
1388 case OMPD_declare_target:
1389 case OMPD_end_declare_target:
1390 case OMPD_threadprivate:
1391 case OMPD_allocate:
1392 case OMPD_declare_reduction:
1393 case OMPD_declare_mapper:
1394 case OMPD_declare_simd:
1395 case OMPD_requires:
1396 case OMPD_declare_variant:
1397 case OMPD_begin_declare_variant:
1398 case OMPD_end_declare_variant:
1399 case OMPD_unknown:
1400 default:
1401 llvm_unreachable("Enexpected directive with task reductions.");
1402 }
1403
1404 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl());
1405 EmitVarDecl(D: *VD);
1406 EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD),
1407 /*Volatile=*/false, Ty: TaskRedRef->getType());
1408 }
1409}
1410
1411void CodeGenFunction::EmitOMPReductionClauseFinal(
1412 const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) {
1413 if (!HaveInsertPoint())
1414 return;
1415 llvm::SmallVector<const Expr *, 8> Privates;
1416 llvm::SmallVector<const Expr *, 8> LHSExprs;
1417 llvm::SmallVector<const Expr *, 8> RHSExprs;
1418 llvm::SmallVector<const Expr *, 8> ReductionOps;
1419 bool HasAtLeastOneReduction = false;
1420 bool IsReductionWithTaskMod = false;
1421 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1422 // Do not emit for inscan reductions.
1423 if (C->getModifier() == OMPC_REDUCTION_inscan)
1424 continue;
1425 HasAtLeastOneReduction = true;
1426 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
1427 LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
1428 RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
1429 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
1430 IsReductionWithTaskMod =
1431 IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task;
1432 }
1433 if (HasAtLeastOneReduction) {
1434 if (IsReductionWithTaskMod) {
1435 CGM.getOpenMPRuntime().emitTaskReductionFini(
1436 *this, D.getBeginLoc(),
1437 isOpenMPWorksharingDirective(D.getDirectiveKind()));
1438 }
1439 bool TeamsLoopCanBeParallel = false;
1440 if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D))
1441 TeamsLoopCanBeParallel = TTLD->canBeParallelFor();
1442 bool WithNowait = D.getSingleClause<OMPNowaitClause>() ||
1443 isOpenMPParallelDirective(D.getDirectiveKind()) ||
1444 TeamsLoopCanBeParallel || ReductionKind == OMPD_simd;
1445 bool SimpleReduction = ReductionKind == OMPD_simd;
1446 // Emit nowait reduction if nowait clause is present or directive is a
1447 // parallel directive (it always has implicit barrier).
1448 CGM.getOpenMPRuntime().emitReduction(
1449 CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps,
1450 Options: {WithNowait, SimpleReduction, ReductionKind});
1451 }
1452}
1453
1454static void emitPostUpdateForReductionClause(
1455 CodeGenFunction &CGF, const OMPExecutableDirective &D,
1456 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
1457 if (!CGF.HaveInsertPoint())
1458 return;
1459 llvm::BasicBlock *DoneBB = nullptr;
1460 for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
1461 if (const Expr *PostUpdate = C->getPostUpdateExpr()) {
1462 if (!DoneBB) {
1463 if (llvm::Value *Cond = CondGen(CGF)) {
1464 // If the first post-update expression is found, emit conditional
1465 // block if it was requested.
1466 llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu");
1467 DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done");
1468 CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
1469 CGF.EmitBlock(BB: ThenBB);
1470 }
1471 }
1472 CGF.EmitIgnoredExpr(E: PostUpdate);
1473 }
1474 }
1475 if (DoneBB)
1476 CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true);
1477}
1478
1479namespace {
1480/// Codegen lambda for appending distribute lower and upper bounds to outlined
1481/// parallel function. This is necessary for combined constructs such as
1482/// 'distribute parallel for'
1483typedef llvm::function_ref<void(CodeGenFunction &,
1484 const OMPExecutableDirective &,
1485 llvm::SmallVectorImpl<llvm::Value *> &)>
1486 CodeGenBoundParametersTy;
1487} // anonymous namespace
1488
1489static void
1490checkForLastprivateConditionalUpdate(CodeGenFunction &CGF,
1491 const OMPExecutableDirective &S) {
1492 if (CGF.getLangOpts().OpenMP < 50)
1493 return;
1494 llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls;
1495 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
1496 for (const Expr *Ref : C->varlists()) {
1497 if (!Ref->getType()->isScalarType())
1498 continue;
1499 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1500 if (!DRE)
1501 continue;
1502 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1503 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1504 }
1505 }
1506 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
1507 for (const Expr *Ref : C->varlists()) {
1508 if (!Ref->getType()->isScalarType())
1509 continue;
1510 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1511 if (!DRE)
1512 continue;
1513 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1514 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1515 }
1516 }
1517 for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) {
1518 for (const Expr *Ref : C->varlists()) {
1519 if (!Ref->getType()->isScalarType())
1520 continue;
1521 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1522 if (!DRE)
1523 continue;
1524 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1525 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref);
1526 }
1527 }
1528 // Privates should ne analyzed since they are not captured at all.
1529 // Task reductions may be skipped - tasks are ignored.
1530 // Firstprivates do not return value but may be passed by reference - no need
1531 // to check for updated lastprivate conditional.
1532 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
1533 for (const Expr *Ref : C->varlists()) {
1534 if (!Ref->getType()->isScalarType())
1535 continue;
1536 const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
1537 if (!DRE)
1538 continue;
1539 PrivateDecls.insert(cast<VarDecl>(DRE->getDecl()));
1540 }
1541 }
1542 CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional(
1543 CGF, D: S, IgnoredDecls: PrivateDecls);
1544}
1545
1546static void emitCommonOMPParallelDirective(
1547 CodeGenFunction &CGF, const OMPExecutableDirective &S,
1548 OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
1549 const CodeGenBoundParametersTy &CodeGenBoundParameters) {
1550 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1551 llvm::Value *NumThreads = nullptr;
1552 llvm::Function *OutlinedFn =
1553 CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
1554 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
1555 CodeGen);
1556 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) {
1557 CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF);
1558 NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1559 /*IgnoreResultAssign=*/true);
1560 CGF.CGM.getOpenMPRuntime().emitNumThreadsClause(
1561 CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc());
1562 }
1563 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) {
1564 CodeGenFunction::RunCleanupsScope ProcBindScope(CGF);
1565 CGF.CGM.getOpenMPRuntime().emitProcBindClause(
1566 CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc());
1567 }
1568 const Expr *IfCond = nullptr;
1569 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
1570 if (C->getNameModifier() == OMPD_unknown ||
1571 C->getNameModifier() == OMPD_parallel) {
1572 IfCond = C->getCondition();
1573 break;
1574 }
1575 }
1576
1577 OMPParallelScope Scope(CGF, S);
1578 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
1579 // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
1580 // lower and upper bounds with the pragma 'for' chunking mechanism.
1581 // The following lambda takes care of appending the lower and upper bound
1582 // parameters when necessary
1583 CodeGenBoundParameters(CGF, S, CapturedVars);
1584 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
1585 CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn,
1586 CapturedVars, IfCond, NumThreads);
1587}
1588
1589static bool isAllocatableDecl(const VarDecl *VD) {
1590 const VarDecl *CVD = VD->getCanonicalDecl();
1591 if (!CVD->hasAttr<OMPAllocateDeclAttr>())
1592 return false;
1593 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1594 // Use the default allocation.
1595 return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc ||
1596 AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) &&
1597 !AA->getAllocator());
1598}
1599
1600static void emitEmptyBoundParameters(CodeGenFunction &,
1601 const OMPExecutableDirective &,
1602 llvm::SmallVectorImpl<llvm::Value *> &) {}
1603
1604static void emitOMPCopyinClause(CodeGenFunction &CGF,
1605 const OMPExecutableDirective &S) {
1606 bool Copyins = CGF.EmitOMPCopyinClause(D: S);
1607 if (Copyins) {
1608 // Emit implicit barrier to synchronize threads and avoid data races on
1609 // propagation master's thread values of threadprivate variables to local
1610 // instances of that variables of all other implicit threads.
1611 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
1612 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
1613 /*ForceSimpleCall=*/true);
1614 }
1615}
1616
1617Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable(
1618 CodeGenFunction &CGF, const VarDecl *VD) {
1619 CodeGenModule &CGM = CGF.CGM;
1620 auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1621
1622 if (!VD)
1623 return Address::invalid();
1624 const VarDecl *CVD = VD->getCanonicalDecl();
1625 if (!isAllocatableDecl(VD: CVD))
1626 return Address::invalid();
1627 llvm::Value *Size;
1628 CharUnits Align = CGM.getContext().getDeclAlign(CVD);
1629 if (CVD->getType()->isVariablyModifiedType()) {
1630 Size = CGF.getTypeSize(Ty: CVD->getType());
1631 // Align the size: ((size + align - 1) / align) * align
1632 Size = CGF.Builder.CreateNUWAdd(
1633 LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1)));
1634 Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align));
1635 Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align));
1636 } else {
1637 CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType());
1638 Size = CGM.getSize(numChars: Sz.alignTo(Align));
1639 }
1640
1641 const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>();
1642 assert(AA->getAllocator() &&
1643 "Expected allocator expression for non-default allocator.");
1644 llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator());
1645 // According to the standard, the original allocator type is a enum (integer).
1646 // Convert to pointer type, if required.
1647 if (Allocator->getType()->isIntegerTy())
1648 Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy);
1649 else if (Allocator->getType()->isPointerTy())
1650 Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator,
1651 DestTy: CGM.VoidPtrTy);
1652
1653 llvm::Value *Addr = OMPBuilder.createOMPAlloc(
1654 Loc: CGF.Builder, Size, Allocator,
1655 Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr"}, FirstSeparator: ".", Separator: "."));
1656 llvm::CallInst *FreeCI =
1657 OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator);
1658
1659 CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI);
1660 Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
1661 Addr,
1662 CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(CVD->getType())),
1663 getNameWithSeparators(Parts: {CVD->getName(), ".addr"}, FirstSeparator: ".", Separator: "."));
1664 return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align);
1665}
1666
1667Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate(
1668 CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr,
1669 SourceLocation Loc) {
1670 CodeGenModule &CGM = CGF.CGM;
1671 if (CGM.getLangOpts().OpenMPUseTLS &&
1672 CGM.getContext().getTargetInfo().isTLSSupported())
1673 return VDAddr;
1674
1675 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1676
1677 llvm::Type *VarTy = VDAddr.getElementType();
1678 llvm::Value *Data =
1679 CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy);
1680 llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy));
1681 std::string Suffix = getNameWithSeparators(Parts: {"cache", ""});
1682 llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix);
1683
1684 llvm::CallInst *ThreadPrivateCacheCall =
1685 OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName);
1686
1687 return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment());
1688}
1689
1690std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators(
1691 ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) {
1692 SmallString<128> Buffer;
1693 llvm::raw_svector_ostream OS(Buffer);
1694 StringRef Sep = FirstSeparator;
1695 for (StringRef Part : Parts) {
1696 OS << Sep << Part;
1697 Sep = Separator;
1698 }
1699 return OS.str().str();
1700}
1701
1702void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
1703 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1704 InsertPointTy CodeGenIP, Twine RegionName) {
1705 CGBuilderTy &Builder = CGF.Builder;
1706 Builder.restoreIP(IP: CodeGenIP);
1707 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1708 Suffix: "." + RegionName + ".after");
1709
1710 {
1711 OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1712 CGF.EmitStmt(S: RegionBodyStmt);
1713 }
1714
1715 if (Builder.saveIP().isSet())
1716 Builder.CreateBr(Dest: FiniBB);
1717}
1718
1719void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1720 CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP,
1721 InsertPointTy CodeGenIP, Twine RegionName) {
1722 CGBuilderTy &Builder = CGF.Builder;
1723 Builder.restoreIP(IP: CodeGenIP);
1724 llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false,
1725 Suffix: "." + RegionName + ".after");
1726
1727 {
1728 OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB);
1729 CGF.EmitStmt(S: RegionBodyStmt);
1730 }
1731
1732 if (Builder.saveIP().isSet())
1733 Builder.CreateBr(Dest: FiniBB);
1734}
1735
1736void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
1737 if (CGM.getLangOpts().OpenMPIRBuilder) {
1738 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
1739 // Check if we have any if clause associated with the directive.
1740 llvm::Value *IfCond = nullptr;
1741 if (const auto *C = S.getSingleClause<OMPIfClause>())
1742 IfCond = EmitScalarExpr(E: C->getCondition(),
1743 /*IgnoreResultAssign=*/true);
1744
1745 llvm::Value *NumThreads = nullptr;
1746 if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>())
1747 NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(),
1748 /*IgnoreResultAssign=*/true);
1749
1750 ProcBindKind ProcBind = OMP_PROC_BIND_default;
1751 if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>())
1752 ProcBind = ProcBindClause->getProcBindKind();
1753
1754 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
1755
1756 // The cleanup callback that finalizes all variabels at the given location,
1757 // thus calls destructors etc.
1758 auto FiniCB = [this](InsertPointTy IP) {
1759 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
1760 };
1761
1762 // Privatization callback that performs appropriate action for
1763 // shared/private/firstprivate/lastprivate/copyin/... variables.
1764 //
1765 // TODO: This defaults to shared right now.
1766 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
1767 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
1768 // The next line is appropriate only for variables (Val) with the
1769 // data-sharing attribute "shared".
1770 ReplVal = &Val;
1771
1772 return CodeGenIP;
1773 };
1774
1775 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
1776 const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt();
1777
1778 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
1779 InsertPointTy CodeGenIP) {
1780 OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody(
1781 CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel");
1782 };
1783
1784 CGCapturedStmtInfo CGSI(*CS, CR_OpenMP);
1785 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
1786 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
1787 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
1788 Builder.restoreIP(
1789 IP: OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB,
1790 IfCond, NumThreads, ProcBind, S.hasCancel()));
1791 return;
1792 }
1793
1794 // Emit parallel region as a standalone region.
1795 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
1796 Action.Enter(CGF);
1797 OMPPrivateScope PrivateScope(CGF);
1798 emitOMPCopyinClause(CGF, S);
1799 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
1800 CGF.EmitOMPPrivateClause(S, PrivateScope);
1801 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
1802 (void)PrivateScope.Privatize();
1803 CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt());
1804 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
1805 };
1806 {
1807 auto LPCRegion =
1808 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
1809 emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
1810 emitEmptyBoundParameters);
1811 emitPostUpdateForReductionClause(*this, S,
1812 [](CodeGenFunction &) { return nullptr; });
1813 }
1814 // Check for outer lastprivate conditional update.
1815 checkForLastprivateConditionalUpdate(*this, S);
1816}
1817
1818void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) {
1819 EmitStmt(S: S.getIfStmt());
1820}
1821
1822namespace {
1823/// RAII to handle scopes for loop transformation directives.
1824class OMPTransformDirectiveScopeRAII {
1825 OMPLoopScope *Scope = nullptr;
1826 CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr;
1827 CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr;
1828
1829 OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) =
1830 delete;
1831 OMPTransformDirectiveScopeRAII &
1832 operator=(const OMPTransformDirectiveScopeRAII &) = delete;
1833
1834public:
1835 OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) {
1836 if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) {
1837 Scope = new OMPLoopScope(CGF, *Dir);
1838 CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP);
1839 CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI);
1840 }
1841 }
1842 ~OMPTransformDirectiveScopeRAII() {
1843 if (!Scope)
1844 return;
1845 delete CapInfoRAII;
1846 delete CGSI;
1847 delete Scope;
1848 }
1849};
1850} // namespace
1851
1852static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop,
1853 int MaxLevel, int Level = 0) {
1854 assert(Level < MaxLevel && "Too deep lookup during loop body codegen.");
1855 const Stmt *SimplifiedS = S->IgnoreContainers();
1856 if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) {
1857 PrettyStackTraceLoc CrashInfo(
1858 CGF.getContext().getSourceManager(), CS->getLBracLoc(),
1859 "LLVM IR generation of compound statement ('{}')");
1860
1861 // Keep track of the current cleanup stack depth, including debug scopes.
1862 CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange());
1863 for (const Stmt *CurStmt : CS->body())
1864 emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level);
1865 return;
1866 }
1867 if (SimplifiedS == NextLoop) {
1868 if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS))
1869 SimplifiedS = Dir->getTransformedStmt();
1870 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS))
1871 SimplifiedS = CanonLoop->getLoopStmt();
1872 if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) {
1873 S = For->getBody();
1874 } else {
1875 assert(isa<CXXForRangeStmt>(SimplifiedS) &&
1876 "Expected canonical for loop or range-based for loop.");
1877 const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS);
1878 CGF.EmitStmt(S: CXXFor->getLoopVarStmt());
1879 S = CXXFor->getBody();
1880 }
1881 if (Level + 1 < MaxLevel) {
1882 NextLoop = OMPLoopDirective::tryToFindNextInnerLoop(
1883 CurStmt: S, /*TryImperfectlyNestedLoops=*/true);
1884 emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1);
1885 return;
1886 }
1887 }
1888 CGF.EmitStmt(S);
1889}
1890
1891void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D,
1892 JumpDest LoopExit) {
1893 RunCleanupsScope BodyScope(*this);
1894 // Update counters values on current iteration.
1895 for (const Expr *UE : D.updates())
1896 EmitIgnoredExpr(E: UE);
1897 // Update the linear variables.
1898 // In distribute directives only loop counters may be marked as linear, no
1899 // need to generate the code for them.
1900 if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
1901 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
1902 for (const Expr *UE : C->updates())
1903 EmitIgnoredExpr(UE);
1904 }
1905 }
1906
1907 // On a continue in the body, jump to the end.
1908 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue");
1909 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
1910 for (const Expr *E : D.finals_conditions()) {
1911 if (!E)
1912 continue;
1913 // Check that loop counter in non-rectangular nest fits into the iteration
1914 // space.
1915 llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next");
1916 EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(),
1917 TrueCount: getProfileCount(S: D.getBody()));
1918 EmitBlock(BB: NextBB);
1919 }
1920
1921 OMPPrivateScope InscanScope(*this);
1922 EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true);
1923 bool IsInscanRegion = InscanScope.Privatize();
1924 if (IsInscanRegion) {
1925 // Need to remember the block before and after scan directive
1926 // to dispatch them correctly depending on the clause used in
1927 // this directive, inclusive or exclusive. For inclusive scan the natural
1928 // order of the blocks is used, for exclusive clause the blocks must be
1929 // executed in reverse order.
1930 OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb");
1931 OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb");
1932 // No need to allocate inscan exit block, in simd mode it is selected in the
1933 // codegen for the scan directive.
1934 if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd)
1935 OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb");
1936 OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch");
1937 EmitBranch(Block: OMPScanDispatch);
1938 EmitBlock(BB: OMPBeforeScanBlock);
1939 }
1940
1941 // Emit loop variables for C++ range loops.
1942 const Stmt *Body =
1943 D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers();
1944 // Emit loop body.
1945 emitBody(*this, Body,
1946 OMPLoopBasedDirective::tryToFindNextInnerLoop(
1947 CurStmt: Body, /*TryImperfectlyNestedLoops=*/true),
1948 D.getLoopsNumber());
1949
1950 // Jump to the dispatcher at the end of the loop body.
1951 if (IsInscanRegion)
1952 EmitBranch(Block: OMPScanExitBlock);
1953
1954 // The end (updates/cleanups).
1955 EmitBlock(BB: Continue.getBlock());
1956 BreakContinueStack.pop_back();
1957}
1958
1959using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>;
1960
1961/// Emit a captured statement and return the function as well as its captured
1962/// closure context.
1963static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF,
1964 const CapturedStmt *S) {
1965 LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S);
1966 CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true);
1967 std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI =
1968 std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S);
1969 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get());
1970 llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S);
1971
1972 return {F, CapStruct.getPointer(CGF&: ParentCGF)};
1973}
1974
1975/// Emit a call to a previously captured closure.
1976static llvm::CallInst *
1977emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap,
1978 llvm::ArrayRef<llvm::Value *> Args) {
1979 // Append the closure context to the argument.
1980 SmallVector<llvm::Value *> EffectiveArgs;
1981 EffectiveArgs.reserve(N: Args.size() + 1);
1982 llvm::append_range(C&: EffectiveArgs, R&: Args);
1983 EffectiveArgs.push_back(Elt: Cap.second);
1984
1985 return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs);
1986}
1987
1988llvm::CanonicalLoopInfo *
1989CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) {
1990 assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented");
1991
1992 // The caller is processing the loop-associated directive processing the \p
1993 // Depth loops nested in \p S. Put the previous pending loop-associated
1994 // directive to the stack. If the current loop-associated directive is a loop
1995 // transformation directive, it will push its generated loops onto the stack
1996 // such that together with the loops left here they form the combined loop
1997 // nest for the parent loop-associated directive.
1998 int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth;
1999 ExpectedOMPLoopDepth = Depth;
2000
2001 EmitStmt(S);
2002 assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops");
2003
2004 // The last added loop is the outermost one.
2005 llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back();
2006
2007 // Pop the \p Depth loops requested by the call from that stack and restore
2008 // the previous context.
2009 OMPLoopNestStack.pop_back_n(NumItems: Depth);
2010 ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth;
2011
2012 return Result;
2013}
2014
2015void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) {
2016 const Stmt *SyntacticalLoop = S->getLoopStmt();
2017 if (!getLangOpts().OpenMPIRBuilder) {
2018 // Ignore if OpenMPIRBuilder is not enabled.
2019 EmitStmt(S: SyntacticalLoop);
2020 return;
2021 }
2022
2023 LexicalScope ForScope(*this, S->getSourceRange());
2024
2025 // Emit init statements. The Distance/LoopVar funcs may reference variable
2026 // declarations they contain.
2027 const Stmt *BodyStmt;
2028 if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) {
2029 if (const Stmt *InitStmt = For->getInit())
2030 EmitStmt(S: InitStmt);
2031 BodyStmt = For->getBody();
2032 } else if (const auto *RangeFor =
2033 dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) {
2034 if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt())
2035 EmitStmt(S: RangeStmt);
2036 if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt())
2037 EmitStmt(S: BeginStmt);
2038 if (const DeclStmt *EndStmt = RangeFor->getEndStmt())
2039 EmitStmt(S: EndStmt);
2040 if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt())
2041 EmitStmt(S: LoopVarStmt);
2042 BodyStmt = RangeFor->getBody();
2043 } else
2044 llvm_unreachable("Expected for-stmt or range-based for-stmt");
2045
2046 // Emit closure for later use. By-value captures will be captured here.
2047 const CapturedStmt *DistanceFunc = S->getDistanceFunc();
2048 EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc);
2049 const CapturedStmt *LoopVarFunc = S->getLoopVarFunc();
2050 EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc);
2051
2052 // Call the distance function to get the number of iterations of the loop to
2053 // come.
2054 QualType LogicalTy = DistanceFunc->getCapturedDecl()
2055 ->getParam(i: 0)
2056 ->getType()
2057 .getNonReferenceType();
2058 RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr");
2059 emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()});
2060 llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count");
2061
2062 // Emit the loop structure.
2063 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2064 auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP,
2065 llvm::Value *IndVar) {
2066 Builder.restoreIP(IP: CodeGenIP);
2067
2068 // Emit the loop body: Convert the logical iteration number to the loop
2069 // variable and emit the body.
2070 const DeclRefExpr *LoopVarRef = S->getLoopVarRef();
2071 LValue LCVal = EmitLValue(LoopVarRef);
2072 Address LoopVarAddress = LCVal.getAddress(CGF&: *this);
2073 emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure,
2074 Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar});
2075
2076 RunCleanupsScope BodyScope(*this);
2077 EmitStmt(S: BodyStmt);
2078 };
2079 llvm::CanonicalLoopInfo *CL =
2080 OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal);
2081
2082 // Finish up the loop.
2083 Builder.restoreIP(IP: CL->getAfterIP());
2084 ForScope.ForceCleanup();
2085
2086 // Remember the CanonicalLoopInfo for parent AST nodes consuming it.
2087 OMPLoopNestStack.push_back(Elt: CL);
2088}
2089
2090void CodeGenFunction::EmitOMPInnerLoop(
2091 const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond,
2092 const Expr *IncExpr,
2093 const llvm::function_ref<void(CodeGenFunction &)> BodyGen,
2094 const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) {
2095 auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end");
2096
2097 // Start the loop with a block that tests the condition.
2098 auto CondBlock = createBasicBlock(name: "omp.inner.for.cond");
2099 EmitBlock(BB: CondBlock);
2100 const SourceRange R = S.getSourceRange();
2101
2102 // If attributes are attached, push to the basic block with them.
2103 const auto &OMPED = cast<OMPExecutableDirective>(Val: S);
2104 const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt();
2105 const Stmt *SS = ICS->getCapturedStmt();
2106 const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS);
2107 OMPLoopNestStack.clear();
2108 if (AS)
2109 LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(),
2110 Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2111 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2112 else
2113 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2114 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2115
2116 // If there are any cleanups between here and the loop-exit scope,
2117 // create a block to stage a loop exit along.
2118 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2119 if (RequiresCleanup)
2120 ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup");
2121
2122 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body");
2123
2124 // Emit condition.
2125 EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(&S));
2126 if (ExitBlock != LoopExit.getBlock()) {
2127 EmitBlock(BB: ExitBlock);
2128 EmitBranchThroughCleanup(Dest: LoopExit);
2129 }
2130
2131 EmitBlock(BB: LoopBody);
2132 incrementProfileCounter(&S);
2133
2134 // Create a block for the increment.
2135 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc");
2136 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
2137
2138 BodyGen(*this);
2139
2140 // Emit "IV = IV + 1" and a back-edge to the condition block.
2141 EmitBlock(BB: Continue.getBlock());
2142 EmitIgnoredExpr(E: IncExpr);
2143 PostIncGen(*this);
2144 BreakContinueStack.pop_back();
2145 EmitBranch(Block: CondBlock);
2146 LoopStack.pop();
2147 // Emit the fall-through block.
2148 EmitBlock(BB: LoopExit.getBlock());
2149}
2150
2151bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) {
2152 if (!HaveInsertPoint())
2153 return false;
2154 // Emit inits for the linear variables.
2155 bool HasLinears = false;
2156 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2157 for (const Expr *Init : C->inits()) {
2158 HasLinears = true;
2159 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl());
2160 if (const auto *Ref =
2161 dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) {
2162 AutoVarEmission Emission = EmitAutoVarAlloca(*VD);
2163 const auto *OrigVD = cast<VarDecl>(Ref->getDecl());
2164 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2165 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2166 VD->getInit()->getType(), VK_LValue,
2167 VD->getInit()->getExprLoc());
2168 EmitExprAsInit(
2169 &DRE, VD,
2170 MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()),
2171 /*capturedByInit=*/false);
2172 EmitAutoVarCleanups(Emission);
2173 } else {
2174 EmitVarDecl(*VD);
2175 }
2176 }
2177 // Emit the linear steps for the linear clauses.
2178 // If a step is not constant, it is pre-calculated before the loop.
2179 if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep()))
2180 if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) {
2181 EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl()));
2182 // Emit calculation of the linear step.
2183 EmitIgnoredExpr(CS);
2184 }
2185 }
2186 return HasLinears;
2187}
2188
2189void CodeGenFunction::EmitOMPLinearClauseFinal(
2190 const OMPLoopDirective &D,
2191 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2192 if (!HaveInsertPoint())
2193 return;
2194 llvm::BasicBlock *DoneBB = nullptr;
2195 // Emit the final values of the linear variables.
2196 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2197 auto IC = C->varlist_begin();
2198 for (const Expr *F : C->finals()) {
2199 if (!DoneBB) {
2200 if (llvm::Value *Cond = CondGen(*this)) {
2201 // If the first post-update expression is found, emit conditional
2202 // block if it was requested.
2203 llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu");
2204 DoneBB = createBasicBlock(".omp.linear.pu.done");
2205 Builder.CreateCondBr(Cond, ThenBB, DoneBB);
2206 EmitBlock(ThenBB);
2207 }
2208 }
2209 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl());
2210 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD),
2211 CapturedStmtInfo->lookup(OrigVD) != nullptr,
2212 (*IC)->getType(), VK_LValue, (*IC)->getExprLoc());
2213 Address OrigAddr = EmitLValue(&DRE).getAddress(*this);
2214 CodeGenFunction::OMPPrivateScope VarScope(*this);
2215 VarScope.addPrivate(OrigVD, OrigAddr);
2216 (void)VarScope.Privatize();
2217 EmitIgnoredExpr(F);
2218 ++IC;
2219 }
2220 if (const Expr *PostUpdate = C->getPostUpdateExpr())
2221 EmitIgnoredExpr(PostUpdate);
2222 }
2223 if (DoneBB)
2224 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2225}
2226
2227static void emitAlignedClause(CodeGenFunction &CGF,
2228 const OMPExecutableDirective &D) {
2229 if (!CGF.HaveInsertPoint())
2230 return;
2231 for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) {
2232 llvm::APInt ClauseAlignment(64, 0);
2233 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2234 auto *AlignmentCI =
2235 cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr));
2236 ClauseAlignment = AlignmentCI->getValue();
2237 }
2238 for (const Expr *E : Clause->varlists()) {
2239 llvm::APInt Alignment(ClauseAlignment);
2240 if (Alignment == 0) {
2241 // OpenMP [2.8.1, Description]
2242 // If no optional parameter is specified, implementation-defined default
2243 // alignments for SIMD instructions on the target platforms are assumed.
2244 Alignment =
2245 CGF.getContext()
2246 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2247 E->getType()->getPointeeType()))
2248 .getQuantity();
2249 }
2250 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2251 "alignment is not power of 2");
2252 if (Alignment != 0) {
2253 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2254 CGF.emitAlignmentAssumption(
2255 PtrValue, E, /*No second loc needed*/ SourceLocation(),
2256 llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment));
2257 }
2258 }
2259 }
2260}
2261
2262void CodeGenFunction::EmitOMPPrivateLoopCounters(
2263 const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) {
2264 if (!HaveInsertPoint())
2265 return;
2266 auto I = S.private_counters().begin();
2267 for (const Expr *E : S.counters()) {
2268 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2269 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl());
2270 // Emit var without initialization.
2271 AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD);
2272 EmitAutoVarCleanups(emission: VarEmission);
2273 LocalDeclMap.erase(PrivateVD);
2274 (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress());
2275 if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) ||
2276 VD->hasGlobalStorage()) {
2277 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD),
2278 LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD),
2279 E->getType(), VK_LValue, E->getExprLoc());
2280 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(&DRE).getAddress(CGF&: *this));
2281 } else {
2282 (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress());
2283 }
2284 ++I;
2285 }
2286 // Privatize extra loop counters used in loops for ordered(n) clauses.
2287 for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) {
2288 if (!C->getNumForLoops())
2289 continue;
2290 for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size();
2291 I < E; ++I) {
2292 const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I));
2293 const auto *VD = cast<VarDecl>(DRE->getDecl());
2294 // Override only those variables that can be captured to avoid re-emission
2295 // of the variables declared within the loops.
2296 if (DRE->refersToEnclosingVariableOrCapture()) {
2297 (void)LoopScope.addPrivate(
2298 VD, CreateMemTemp(DRE->getType(), VD->getName()));
2299 }
2300 }
2301 }
2302}
2303
2304static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S,
2305 const Expr *Cond, llvm::BasicBlock *TrueBlock,
2306 llvm::BasicBlock *FalseBlock, uint64_t TrueCount) {
2307 if (!CGF.HaveInsertPoint())
2308 return;
2309 {
2310 CodeGenFunction::OMPPrivateScope PreCondScope(CGF);
2311 CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope);
2312 (void)PreCondScope.Privatize();
2313 // Get initial values of real counters.
2314 for (const Expr *I : S.inits()) {
2315 CGF.EmitIgnoredExpr(E: I);
2316 }
2317 }
2318 // Create temp loop control variables with their init values to support
2319 // non-rectangular loops.
2320 CodeGenFunction::OMPMapVars PreCondVars;
2321 for (const Expr *E : S.dependent_counters()) {
2322 if (!E)
2323 continue;
2324 assert(!E->getType().getNonReferenceType()->isRecordType() &&
2325 "dependent counter must not be an iterator.");
2326 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
2327 Address CounterAddr =
2328 CGF.CreateMemTemp(VD->getType().getNonReferenceType());
2329 (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr);
2330 }
2331 (void)PreCondVars.apply(CGF);
2332 for (const Expr *E : S.dependent_inits()) {
2333 if (!E)
2334 continue;
2335 CGF.EmitIgnoredExpr(E);
2336 }
2337 // Check that loop is executed at least one time.
2338 CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount);
2339 PreCondVars.restore(CGF);
2340}
2341
2342void CodeGenFunction::EmitOMPLinearClause(
2343 const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) {
2344 if (!HaveInsertPoint())
2345 return;
2346 llvm::DenseSet<const VarDecl *> SIMDLCVs;
2347 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
2348 const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D);
2349 for (const Expr *C : LoopDirective->counters()) {
2350 SIMDLCVs.insert(
2351 V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl());
2352 }
2353 }
2354 for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) {
2355 auto CurPrivate = C->privates().begin();
2356 for (const Expr *E : C->varlists()) {
2357 const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
2358 const auto *PrivateVD =
2359 cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl());
2360 if (!SIMDLCVs.count(VD->getCanonicalDecl())) {
2361 // Emit private VarDecl with copy init.
2362 EmitVarDecl(*PrivateVD);
2363 bool IsRegistered =
2364 PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD));
2365 assert(IsRegistered && "linear var already registered as private");
2366 // Silence the warning about unused variable.
2367 (void)IsRegistered;
2368 } else {
2369 EmitVarDecl(*PrivateVD);
2370 }
2371 ++CurPrivate;
2372 }
2373 }
2374}
2375
2376static void emitSimdlenSafelenClause(CodeGenFunction &CGF,
2377 const OMPExecutableDirective &D) {
2378 if (!CGF.HaveInsertPoint())
2379 return;
2380 if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) {
2381 RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2382 /*ignoreResult=*/true);
2383 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2384 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2385 // In presence of finite 'safelen', it may be unsafe to mark all
2386 // the memory instructions parallel, because loop-carried
2387 // dependences of 'safelen' iterations are possible.
2388 CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>());
2389 } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) {
2390 RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2391 /*ignoreResult=*/true);
2392 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2393 CGF.LoopStack.setVectorizeWidth(Val->getZExtValue());
2394 // In presence of finite 'safelen', it may be unsafe to mark all
2395 // the memory instructions parallel, because loop-carried
2396 // dependences of 'safelen' iterations are possible.
2397 CGF.LoopStack.setParallel(/*Enable=*/false);
2398 }
2399}
2400
2401void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) {
2402 // Walk clauses and process safelen/lastprivate.
2403 LoopStack.setParallel(/*Enable=*/true);
2404 LoopStack.setVectorizeEnable();
2405 emitSimdlenSafelenClause(*this, D);
2406 if (const auto *C = D.getSingleClause<OMPOrderClause>())
2407 if (C->getKind() == OMPC_ORDER_concurrent)
2408 LoopStack.setParallel(/*Enable=*/true);
2409 if ((D.getDirectiveKind() == OMPD_simd ||
2410 (getLangOpts().OpenMPSimd &&
2411 isOpenMPSimdDirective(D.getDirectiveKind()))) &&
2412 llvm::any_of(D.getClausesOfKind<OMPReductionClause>(),
2413 [](const OMPReductionClause *C) {
2414 return C->getModifier() == OMPC_REDUCTION_inscan;
2415 }))
2416 // Disable parallel access in case of prefix sum.
2417 LoopStack.setParallel(/*Enable=*/false);
2418}
2419
2420void CodeGenFunction::EmitOMPSimdFinal(
2421 const OMPLoopDirective &D,
2422 const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) {
2423 if (!HaveInsertPoint())
2424 return;
2425 llvm::BasicBlock *DoneBB = nullptr;
2426 auto IC = D.counters().begin();
2427 auto IPC = D.private_counters().begin();
2428 for (const Expr *F : D.finals()) {
2429 const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl());
2430 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl());
2431 const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD);
2432 if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) ||
2433 OrigVD->hasGlobalStorage() || CED) {
2434 if (!DoneBB) {
2435 if (llvm::Value *Cond = CondGen(*this)) {
2436 // If the first post-update expression is found, emit conditional
2437 // block if it was requested.
2438 llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then");
2439 DoneBB = createBasicBlock(name: ".omp.final.done");
2440 Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB);
2441 EmitBlock(BB: ThenBB);
2442 }
2443 }
2444 Address OrigAddr = Address::invalid();
2445 if (CED) {
2446 OrigAddr =
2447 EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress(*this);
2448 } else {
2449 DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD),
2450 /*RefersToEnclosingVariableOrCapture=*/false,
2451 (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc());
2452 OrigAddr = EmitLValue(&DRE).getAddress(CGF&: *this);
2453 }
2454 OMPPrivateScope VarScope(*this);
2455 VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr);
2456 (void)VarScope.Privatize();
2457 EmitIgnoredExpr(E: F);
2458 }
2459 ++IC;
2460 ++IPC;
2461 }
2462 if (DoneBB)
2463 EmitBlock(BB: DoneBB, /*IsFinished=*/true);
2464}
2465
2466static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
2467 const OMPLoopDirective &S,
2468 CodeGenFunction::JumpDest LoopExit) {
2469 CGF.EmitOMPLoopBody(D: S, LoopExit);
2470 CGF.EmitStopPoint(&S);
2471}
2472
2473/// Emit a helper variable and return corresponding lvalue.
2474static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
2475 const DeclRefExpr *Helper) {
2476 auto VDecl = cast<VarDecl>(Val: Helper->getDecl());
2477 CGF.EmitVarDecl(D: *VDecl);
2478 return CGF.EmitLValue(Helper);
2479}
2480
2481static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S,
2482 const RegionCodeGenTy &SimdInitGen,
2483 const RegionCodeGenTy &BodyCodeGen) {
2484 auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF,
2485 PrePostActionTy &) {
2486 CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S);
2487 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2488 SimdInitGen(CGF);
2489
2490 BodyCodeGen(CGF);
2491 };
2492 auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
2493 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
2494 CGF.LoopStack.setVectorizeEnable(/*Enable=*/false);
2495
2496 BodyCodeGen(CGF);
2497 };
2498 const Expr *IfCond = nullptr;
2499 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
2500 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
2501 if (CGF.getLangOpts().OpenMP >= 50 &&
2502 (C->getNameModifier() == OMPD_unknown ||
2503 C->getNameModifier() == OMPD_simd)) {
2504 IfCond = C->getCondition();
2505 break;
2506 }
2507 }
2508 }
2509 if (IfCond) {
2510 CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen);
2511 } else {
2512 RegionCodeGenTy ThenRCG(ThenGen);
2513 ThenRCG(CGF);
2514 }
2515}
2516
2517static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
2518 PrePostActionTy &Action) {
2519 Action.Enter(CGF);
2520 assert(isOpenMPSimdDirective(S.getDirectiveKind()) &&
2521 "Expected simd directive");
2522 OMPLoopScope PreInitScope(CGF, S);
2523 // if (PreCond) {
2524 // for (IV in 0..LastIteration) BODY;
2525 // <Final counter/linear vars updates>;
2526 // }
2527 //
2528 if (isOpenMPDistributeDirective(S.getDirectiveKind()) ||
2529 isOpenMPWorksharingDirective(S.getDirectiveKind()) ||
2530 isOpenMPTaskLoopDirective(S.getDirectiveKind())) {
2531 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()));
2532 (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()));
2533 }
2534
2535 // Emit: if (PreCond) - begin.
2536 // If the condition constant folds and can be elided, avoid emitting the
2537 // whole loop.
2538 bool CondConstant;
2539 llvm::BasicBlock *ContBlock = nullptr;
2540 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
2541 if (!CondConstant)
2542 return;
2543 } else {
2544 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then");
2545 ContBlock = CGF.createBasicBlock(name: "simd.if.end");
2546 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
2547 TrueCount: CGF.getProfileCount(&S));
2548 CGF.EmitBlock(BB: ThenBlock);
2549 CGF.incrementProfileCounter(&S);
2550 }
2551
2552 // Emit the loop iteration variable.
2553 const Expr *IVExpr = S.getIterationVariable();
2554 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
2555 CGF.EmitVarDecl(D: *IVDecl);
2556 CGF.EmitIgnoredExpr(E: S.getInit());
2557
2558 // Emit the iterations count variable.
2559 // If it is not a variable, Sema decided to calculate iterations count on
2560 // each iteration (e.g., it is foldable into a constant).
2561 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
2562 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
2563 // Emit calculation of the iterations count.
2564 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
2565 }
2566
2567 emitAlignedClause(CGF, S);
2568 (void)CGF.EmitOMPLinearClauseInit(D: S);
2569 {
2570 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
2571 CGF.EmitOMPPrivateClause(S, LoopScope);
2572 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
2573 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
2574 CGF.EmitOMPReductionClauseInit(S, LoopScope);
2575 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
2576 CGF, S, CGF.EmitLValue(E: S.getIterationVariable()));
2577 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
2578 (void)LoopScope.Privatize();
2579 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
2580 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
2581
2582 emitCommonSimdLoop(
2583 CGF, S,
2584 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
2585 CGF.EmitOMPSimdInit(D: S);
2586 },
2587 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2588 CGF.EmitOMPInnerLoop(
2589 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
2590 [&S](CodeGenFunction &CGF) {
2591 emitOMPLoopBodyWithStopPoint(CGF, S,
2592 LoopExit: CodeGenFunction::JumpDest());
2593 },
2594 [](CodeGenFunction &) {});
2595 });
2596 CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2597 // Emit final copy of the lastprivate variables at the end of loops.
2598 if (HasLastprivateClause)
2599 CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true);
2600 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd);
2601 emitPostUpdateForReductionClause(CGF, S,
2602 [](CodeGenFunction &) { return nullptr; });
2603 LoopScope.restoreMap();
2604 CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; });
2605 }
2606 // Emit: if (PreCond) - end.
2607 if (ContBlock) {
2608 CGF.EmitBranch(Block: ContBlock);
2609 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
2610 }
2611}
2612
2613static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) {
2614 // Check for unsupported clauses
2615 for (OMPClause *C : S.clauses()) {
2616 // Currently only order, simdlen and safelen clauses are supported
2617 if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) ||
2618 isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C)))
2619 return false;
2620 }
2621
2622 // Check if we have a statement with the ordered directive.
2623 // Visit the statement hierarchy to find a compound statement
2624 // with a ordered directive in it.
2625 if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) {
2626 if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) {
2627 for (const Stmt *SubStmt : SyntacticalLoop->children()) {
2628 if (!SubStmt)
2629 continue;
2630 if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) {
2631 for (const Stmt *CSSubStmt : CS->children()) {
2632 if (!CSSubStmt)
2633 continue;
2634 if (isa<OMPOrderedDirective>(CSSubStmt)) {
2635 return false;
2636 }
2637 }
2638 }
2639 }
2640 }
2641 }
2642 return true;
2643}
2644static llvm::MapVector<llvm::Value *, llvm::Value *>
2645GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) {
2646 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars;
2647 for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) {
2648 llvm::APInt ClauseAlignment(64, 0);
2649 if (const Expr *AlignmentExpr = Clause->getAlignment()) {
2650 auto *AlignmentCI =
2651 cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr));
2652 ClauseAlignment = AlignmentCI->getValue();
2653 }
2654 for (const Expr *E : Clause->varlists()) {
2655 llvm::APInt Alignment(ClauseAlignment);
2656 if (Alignment == 0) {
2657 // OpenMP [2.8.1, Description]
2658 // If no optional parameter is specified, implementation-defined default
2659 // alignments for SIMD instructions on the target platforms are assumed.
2660 Alignment =
2661 CGF.getContext()
2662 .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign(
2663 E->getType()->getPointeeType()))
2664 .getQuantity();
2665 }
2666 assert((Alignment == 0 || Alignment.isPowerOf2()) &&
2667 "alignment is not power of 2");
2668 llvm::Value *PtrValue = CGF.EmitScalarExpr(E);
2669 AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue());
2670 }
2671 }
2672 return AlignedVars;
2673}
2674
2675void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
2676 bool UseOMPIRBuilder =
2677 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
2678 if (UseOMPIRBuilder) {
2679 auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF,
2680 PrePostActionTy &) {
2681 // Use the OpenMPIRBuilder if enabled.
2682 if (UseOMPIRBuilder) {
2683 llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars =
2684 GetAlignedMapping(S, CGF);
2685 // Emit the associated statement and get its loop representation.
2686 const Stmt *Inner = S.getRawStmt();
2687 llvm::CanonicalLoopInfo *CLI =
2688 EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2689
2690 llvm::OpenMPIRBuilder &OMPBuilder =
2691 CGM.getOpenMPRuntime().getOMPBuilder();
2692 // Add SIMD specific metadata
2693 llvm::ConstantInt *Simdlen = nullptr;
2694 if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) {
2695 RValue Len =
2696 this->EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(),
2697 /*ignoreResult=*/true);
2698 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2699 Simdlen = Val;
2700 }
2701 llvm::ConstantInt *Safelen = nullptr;
2702 if (const auto *C = S.getSingleClause<OMPSafelenClause>()) {
2703 RValue Len =
2704 this->EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(),
2705 /*ignoreResult=*/true);
2706 auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal());
2707 Safelen = Val;
2708 }
2709 llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown;
2710 if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
2711 if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) {
2712 Order = llvm::omp::OrderKind::OMP_ORDER_concurrent;
2713 }
2714 }
2715 // Add simd metadata to the collapsed loop. Do not generate
2716 // another loop for if clause. Support for if clause is done earlier.
2717 OMPBuilder.applySimd(CLI, AlignedVars,
2718 /*IfCond*/ nullptr, Order, Simdlen, Safelen);
2719 return;
2720 }
2721 };
2722 {
2723 auto LPCRegion =
2724 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2725 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2726 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd,
2727 CodeGenIRBuilder);
2728 }
2729 return;
2730 }
2731
2732 ParentLoopDirectiveForScanRegion ScanRegion(*this, S);
2733 OMPFirstScanLoop = true;
2734 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
2735 emitOMPSimdRegion(CGF, S, Action);
2736 };
2737 {
2738 auto LPCRegion =
2739 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
2740 OMPLexicalScope Scope(*this, S, OMPD_unknown);
2741 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
2742 }
2743 // Check for outer lastprivate conditional update.
2744 checkForLastprivateConditionalUpdate(*this, S);
2745}
2746
2747void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) {
2748 // Emit the de-sugared statement.
2749 OMPTransformDirectiveScopeRAII TileScope(*this, &S);
2750 EmitStmt(S: S.getTransformedStmt());
2751}
2752
2753void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) {
2754 bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder;
2755
2756 if (UseOMPIRBuilder) {
2757 auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc());
2758 const Stmt *Inner = S.getRawStmt();
2759
2760 // Consume nested loop. Clear the entire remaining loop stack because a
2761 // fully unrolled loop is non-transformable. For partial unrolling the
2762 // generated outer loop is pushed back to the stack.
2763 llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
2764 OMPLoopNestStack.clear();
2765
2766 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
2767
2768 bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1;
2769 llvm::CanonicalLoopInfo *UnrolledCLI = nullptr;
2770
2771 if (S.hasClausesOfKind<OMPFullClause>()) {
2772 assert(ExpectedOMPLoopDepth == 0);
2773 OMPBuilder.unrollLoopFull(DL: DL, Loop: CLI);
2774 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2775 uint64_t Factor = 0;
2776 if (Expr *FactorExpr = PartialClause->getFactor()) {
2777 Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2778 assert(Factor >= 1 && "Only positive factors are valid");
2779 }
2780 OMPBuilder.unrollLoopPartial(DL: DL, Loop: CLI, Factor,
2781 UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr);
2782 } else {
2783 OMPBuilder.unrollLoopHeuristic(DL: DL, Loop: CLI);
2784 }
2785
2786 assert((!NeedsUnrolledCLI || UnrolledCLI) &&
2787 "NeedsUnrolledCLI implies UnrolledCLI to be set");
2788 if (UnrolledCLI)
2789 OMPLoopNestStack.push_back(Elt: UnrolledCLI);
2790
2791 return;
2792 }
2793
2794 // This function is only called if the unrolled loop is not consumed by any
2795 // other loop-associated construct. Such a loop-associated construct will have
2796 // used the transformed AST.
2797
2798 // Set the unroll metadata for the next emitted loop.
2799 LoopStack.setUnrollState(LoopAttributes::Enable);
2800
2801 if (S.hasClausesOfKind<OMPFullClause>()) {
2802 LoopStack.setUnrollState(LoopAttributes::Full);
2803 } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) {
2804 if (Expr *FactorExpr = PartialClause->getFactor()) {
2805 uint64_t Factor =
2806 FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue();
2807 assert(Factor >= 1 && "Only positive factors are valid");
2808 LoopStack.setUnrollCount(Factor);
2809 }
2810 }
2811
2812 EmitStmt(S: S.getAssociatedStmt());
2813}
2814
2815void CodeGenFunction::EmitOMPOuterLoop(
2816 bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
2817 CodeGenFunction::OMPPrivateScope &LoopScope,
2818 const CodeGenFunction::OMPLoopArguments &LoopArgs,
2819 const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
2820 const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
2821 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2822
2823 const Expr *IVExpr = S.getIterationVariable();
2824 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
2825 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2826
2827 JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end");
2828
2829 // Start the loop with a block that tests the condition.
2830 llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond");
2831 EmitBlock(BB: CondBlock);
2832 const SourceRange R = S.getSourceRange();
2833 OMPLoopNestStack.clear();
2834 LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()),
2835 EndLoc: SourceLocToDebugLoc(Location: R.getEnd()));
2836
2837 llvm::Value *BoolCondVal = nullptr;
2838 if (!DynamicOrOrdered) {
2839 // UB = min(UB, GlobalUB) or
2840 // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
2841 // 'distribute parallel for')
2842 EmitIgnoredExpr(E: LoopArgs.EUB);
2843 // IV = LB
2844 EmitIgnoredExpr(E: LoopArgs.Init);
2845 // IV < UB
2846 BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond);
2847 } else {
2848 BoolCondVal =
2849 RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL,
2850 LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST);
2851 }
2852
2853 // If there are any cleanups between here and the loop-exit scope,
2854 // create a block to stage a loop exit along.
2855 llvm::BasicBlock *ExitBlock = LoopExit.getBlock();
2856 if (LoopScope.requiresCleanups())
2857 ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup");
2858
2859 llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body");
2860 Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock);
2861 if (ExitBlock != LoopExit.getBlock()) {
2862 EmitBlock(BB: ExitBlock);
2863 EmitBranchThroughCleanup(Dest: LoopExit);
2864 }
2865 EmitBlock(BB: LoopBody);
2866
2867 // Emit "IV = LB" (in case of static schedule, we have already calculated new
2868 // LB for loop condition and emitted it above).
2869 if (DynamicOrOrdered)
2870 EmitIgnoredExpr(E: LoopArgs.Init);
2871
2872 // Create a block for the increment.
2873 JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc");
2874 BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue));
2875
2876 emitCommonSimdLoop(
2877 CGF&: *this, S,
2878 SimdInitGen: [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) {
2879 // Generate !llvm.loop.parallel metadata for loads and stores for loops
2880 // with dynamic/guided scheduling and without ordered clause.
2881 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
2882 CGF.LoopStack.setParallel(!IsMonotonic);
2883 if (const auto *C = S.getSingleClause<OMPOrderClause>())
2884 if (C->getKind() == OMPC_ORDER_concurrent)
2885 CGF.LoopStack.setParallel(/*Enable=*/true);
2886 } else {
2887 CGF.EmitOMPSimdInit(D: S);
2888 }
2889 },
2890 BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered,
2891 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
2892 SourceLocation Loc = S.getBeginLoc();
2893 // when 'distribute' is not combined with a 'for':
2894 // while (idx <= UB) { BODY; ++idx; }
2895 // when 'distribute' is combined with a 'for'
2896 // (e.g. 'distribute parallel for')
2897 // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
2898 CGF.EmitOMPInnerLoop(
2899 S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
2900 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
2901 CodeGenLoop(CGF, S, LoopExit);
2902 },
2903 [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
2904 CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
2905 });
2906 });
2907
2908 EmitBlock(BB: Continue.getBlock());
2909 BreakContinueStack.pop_back();
2910 if (!DynamicOrOrdered) {
2911 // Emit "LB = LB + Stride", "UB = UB + Stride".
2912 EmitIgnoredExpr(E: LoopArgs.NextLB);
2913 EmitIgnoredExpr(E: LoopArgs.NextUB);
2914 }
2915
2916 EmitBranch(Block: CondBlock);
2917 OMPLoopNestStack.clear();
2918 LoopStack.pop();
2919 // Emit the fall-through block.
2920 EmitBlock(BB: LoopExit.getBlock());
2921
2922 // Tell the runtime we are done.
2923 auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) {
2924 if (!DynamicOrOrdered)
2925 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
2926 LoopArgs.DKind);
2927 };
2928 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
2929}
2930
2931void CodeGenFunction::EmitOMPForOuterLoop(
2932 const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
2933 const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
2934 const OMPLoopArguments &LoopArgs,
2935 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
2936 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
2937
2938 // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
2939 const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule);
2940
2941 assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule,
2942 LoopArgs.Chunk != nullptr)) &&
2943 "static non-chunked schedule does not need outer loop");
2944
2945 // Emit outer loop.
2946 //
2947 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2948 // When schedule(dynamic,chunk_size) is specified, the iterations are
2949 // distributed to threads in the team in chunks as the threads request them.
2950 // Each thread executes a chunk of iterations, then requests another chunk,
2951 // until no chunks remain to be distributed. Each chunk contains chunk_size
2952 // iterations, except for the last chunk to be distributed, which may have
2953 // fewer iterations. When no chunk_size is specified, it defaults to 1.
2954 //
2955 // When schedule(guided,chunk_size) is specified, the iterations are assigned
2956 // to threads in the team in chunks as the executing threads request them.
2957 // Each thread executes a chunk of iterations, then requests another chunk,
2958 // until no chunks remain to be assigned. For a chunk_size of 1, the size of
2959 // each chunk is proportional to the number of unassigned iterations divided
2960 // by the number of threads in the team, decreasing to 1. For a chunk_size
2961 // with value k (greater than 1), the size of each chunk is determined in the
2962 // same way, with the restriction that the chunks do not contain fewer than k
2963 // iterations (except for the last chunk to be assigned, which may have fewer
2964 // than k iterations).
2965 //
2966 // When schedule(auto) is specified, the decision regarding scheduling is
2967 // delegated to the compiler and/or runtime system. The programmer gives the
2968 // implementation the freedom to choose any possible mapping of iterations to
2969 // threads in the team.
2970 //
2971 // When schedule(runtime) is specified, the decision regarding scheduling is
2972 // deferred until run time, and the schedule and chunk size are taken from the
2973 // run-sched-var ICV. If the ICV is set to auto, the schedule is
2974 // implementation defined
2975 //
2976 // while(__kmpc_dispatch_next(&LB, &UB)) {
2977 // idx = LB;
2978 // while (idx <= UB) { BODY; ++idx;
2979 // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only.
2980 // } // inner loop
2981 // }
2982 //
2983 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
2984 // When schedule(static, chunk_size) is specified, iterations are divided into
2985 // chunks of size chunk_size, and the chunks are assigned to the threads in
2986 // the team in a round-robin fashion in the order of the thread number.
2987 //
2988 // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) {
2989 // while (idx <= UB) { BODY; ++idx; } // inner loop
2990 // LB = LB + ST;
2991 // UB = UB + ST;
2992 // }
2993 //
2994
2995 const Expr *IVExpr = S.getIterationVariable();
2996 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
2997 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
2998
2999 if (DynamicOrOrdered) {
3000 const std::pair<llvm::Value *, llvm::Value *> DispatchBounds =
3001 CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
3002 llvm::Value *LBVal = DispatchBounds.first;
3003 llvm::Value *UBVal = DispatchBounds.second;
3004 CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
3005 LoopArgs.Chunk};
3006 RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize,
3007 IVSigned, Ordered, DispatchValues: DipatchRTInputValues);
3008 } else {
3009 CGOpenMPRuntime::StaticRTInput StaticInit(
3010 IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
3011 LoopArgs.ST, LoopArgs.Chunk);
3012 RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(),
3013 ScheduleKind, StaticInit);
3014 }
3015
3016 auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
3017 const unsigned IVSize,
3018 const bool IVSigned) {
3019 if (Ordered) {
3020 CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
3021 IVSigned);
3022 }
3023 };
3024
3025 OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
3026 LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
3027 OuterLoopArgs.IncExpr = S.getInc();
3028 OuterLoopArgs.Init = S.getInit();
3029 OuterLoopArgs.Cond = S.getCond();
3030 OuterLoopArgs.NextLB = S.getNextLowerBound();
3031 OuterLoopArgs.NextUB = S.getNextUpperBound();
3032 OuterLoopArgs.DKind = LoopArgs.DKind;
3033 EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs,
3034 CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
3035}
3036
3037static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
3038 const unsigned IVSize, const bool IVSigned) {}
3039
3040void CodeGenFunction::EmitOMPDistributeOuterLoop(
3041 OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
3042 OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
3043 const CodeGenLoopTy &CodeGenLoopContent) {
3044
3045 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3046
3047 // Emit outer loop.
3048 // Same behavior as a OMPForOuterLoop, except that schedule cannot be
3049 // dynamic
3050 //
3051
3052 const Expr *IVExpr = S.getIterationVariable();
3053 const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType());
3054 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3055
3056 CGOpenMPRuntime::StaticRTInput StaticInit(
3057 IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB,
3058 LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk);
3059 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit);
3060
3061 // for combined 'distribute' and 'for' the increment expression of distribute
3062 // is stored in DistInc. For 'distribute' alone, it is in Inc.
3063 Expr *IncExpr;
3064 if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
3065 IncExpr = S.getDistInc();
3066 else
3067 IncExpr = S.getInc();
3068
3069 // this routine is shared by 'omp distribute parallel for' and
3070 // 'omp distribute': select the right EUB expression depending on the
3071 // directive
3072 OMPLoopArguments OuterLoopArgs;
3073 OuterLoopArgs.LB = LoopArgs.LB;
3074 OuterLoopArgs.UB = LoopArgs.UB;
3075 OuterLoopArgs.ST = LoopArgs.ST;
3076 OuterLoopArgs.IL = LoopArgs.IL;
3077 OuterLoopArgs.Chunk = LoopArgs.Chunk;
3078 OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3079 ? S.getCombinedEnsureUpperBound()
3080 : S.getEnsureUpperBound();
3081 OuterLoopArgs.IncExpr = IncExpr;
3082 OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3083 ? S.getCombinedInit()
3084 : S.getInit();
3085 OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3086 ? S.getCombinedCond()
3087 : S.getCond();
3088 OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3089 ? S.getCombinedNextLowerBound()
3090 : S.getNextLowerBound();
3091 OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
3092 ? S.getCombinedNextUpperBound()
3093 : S.getNextUpperBound();
3094 OuterLoopArgs.DKind = OMPD_distribute;
3095
3096 EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
3097 LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent,
3098 CodeGenOrdered: emitEmptyOrdered);
3099}
3100
3101static std::pair<LValue, LValue>
3102emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
3103 const OMPExecutableDirective &S) {
3104 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3105 LValue LB =
3106 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3107 LValue UB =
3108 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3109
3110 // When composing 'distribute' with 'for' (e.g. as in 'distribute
3111 // parallel for') we need to use the 'distribute'
3112 // chunk lower and upper bounds rather than the whole loop iteration
3113 // space. These are parameters to the outlined function for 'parallel'
3114 // and we copy the bounds of the previous schedule into the
3115 // the current ones.
3116 LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable());
3117 LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable());
3118 llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(
3119 lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3120 PrevLBVal = CGF.EmitScalarConversion(
3121 Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(),
3122 DstTy: LS.getIterationVariable()->getType(),
3123 Loc: LS.getPrevLowerBoundVariable()->getExprLoc());
3124 llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(
3125 lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3126 PrevUBVal = CGF.EmitScalarConversion(
3127 Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(),
3128 DstTy: LS.getIterationVariable()->getType(),
3129 Loc: LS.getPrevUpperBoundVariable()->getExprLoc());
3130
3131 CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB);
3132 CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB);
3133
3134 return {LB, UB};
3135}
3136
3137/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
3138/// we need to use the LB and UB expressions generated by the worksharing
3139/// code generation support, whereas in non combined situations we would
3140/// just emit 0 and the LastIteration expression
3141/// This function is necessary due to the difference of the LB and UB
3142/// types for the RT emission routines for 'for_static_init' and
3143/// 'for_dispatch_init'
3144static std::pair<llvm::Value *, llvm::Value *>
3145emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
3146 const OMPExecutableDirective &S,
3147 Address LB, Address UB) {
3148 const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S);
3149 const Expr *IVExpr = LS.getIterationVariable();
3150 // when implementing a dynamic schedule for a 'for' combined with a
3151 // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
3152 // is not normalized as each team only executes its own assigned
3153 // distribute chunk
3154 QualType IteratorTy = IVExpr->getType();
3155 llvm::Value *LBVal =
3156 CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3157 llvm::Value *UBVal =
3158 CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc());
3159 return {LBVal, UBVal};
3160}
3161
3162static void emitDistributeParallelForDistributeInnerBoundParams(
3163 CodeGenFunction &CGF, const OMPExecutableDirective &S,
3164 llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
3165 const auto &Dir = cast<OMPLoopDirective>(Val: S);
3166 LValue LB =
3167 CGF.EmitLValue(cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable()));
3168 llvm::Value *LBCast =
3169 CGF.Builder.CreateIntCast(V: CGF.Builder.CreateLoad(Addr: LB.getAddress(CGF)),
3170 DestTy: CGF.SizeTy, /*isSigned=*/false);
3171 CapturedVars.push_back(Elt: LBCast);
3172 LValue UB =
3173 CGF.EmitLValue(cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable()));
3174
3175 llvm::Value *UBCast =
3176 CGF.Builder.CreateIntCast(V: CGF.Builder.CreateLoad(Addr: UB.getAddress(CGF)),
3177 DestTy: CGF.SizeTy, /*isSigned=*/false);
3178 CapturedVars.push_back(Elt: UBCast);
3179}
3180
3181static void
3182emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
3183 const OMPLoopDirective &S,
3184 CodeGenFunction::JumpDest LoopExit) {
3185 auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
3186 PrePostActionTy &Action) {
3187 Action.Enter(CGF);
3188 bool HasCancel = false;
3189 if (!isOpenMPSimdDirective(S.getDirectiveKind())) {
3190 if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S))
3191 HasCancel = D->hasCancel();
3192 else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S))
3193 HasCancel = D->hasCancel();
3194 else if (const auto *D =
3195 dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S))
3196 HasCancel = D->hasCancel();
3197 }
3198 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3199 HasCancel);
3200 CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(),
3201 CodeGenLoopBounds: emitDistributeParallelForInnerBounds,
3202 CGDispatchBounds: emitDistributeParallelForDispatchBounds);
3203 };
3204
3205 emitCommonOMPParallelDirective(
3206 CGF, S,
3207 isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for,
3208 CGInlinedWorksharingLoop,
3209 emitDistributeParallelForDistributeInnerBoundParams);
3210}
3211
3212void CodeGenFunction::EmitOMPDistributeParallelForDirective(
3213 const OMPDistributeParallelForDirective &S) {
3214 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3215 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3216 IncExpr: S.getDistInc());
3217 };
3218 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3219 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3220}
3221
3222void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
3223 const OMPDistributeParallelForSimdDirective &S) {
3224 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3225 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
3226 IncExpr: S.getDistInc());
3227 };
3228 OMPLexicalScope Scope(*this, S, OMPD_parallel);
3229 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
3230}
3231
3232void CodeGenFunction::EmitOMPDistributeSimdDirective(
3233 const OMPDistributeSimdDirective &S) {
3234 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3235 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
3236 };
3237 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3238 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3239}
3240
3241void CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
3242 CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) {
3243 // Emit SPMD target parallel for region as a standalone region.
3244 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3245 emitOMPSimdRegion(CGF, S, Action);
3246 };
3247 llvm::Function *Fn;
3248 llvm::Constant *Addr;
3249 // Emit target region as a standalone region.
3250 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
3251 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
3252 assert(Fn && Addr && "Target device function emission failed.");
3253}
3254
3255void CodeGenFunction::EmitOMPTargetSimdDirective(
3256 const OMPTargetSimdDirective &S) {
3257 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
3258 emitOMPSimdRegion(CGF, S, Action);
3259 };
3260 emitCommonOMPTargetDirective(*this, S, CodeGen);
3261}
3262
3263namespace {
3264struct ScheduleKindModifiersTy {
3265 OpenMPScheduleClauseKind Kind;
3266 OpenMPScheduleClauseModifier M1;
3267 OpenMPScheduleClauseModifier M2;
3268 ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind,
3269 OpenMPScheduleClauseModifier M1,
3270 OpenMPScheduleClauseModifier M2)
3271 : Kind(Kind), M1(M1), M2(M2) {}
3272};
3273} // namespace
3274
3275bool CodeGenFunction::EmitOMPWorksharingLoop(
3276 const OMPLoopDirective &S, Expr *EUB,
3277 const CodeGenLoopBoundsTy &CodeGenLoopBounds,
3278 const CodeGenDispatchBoundsTy &CGDispatchBounds) {
3279 // Emit the loop iteration variable.
3280 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
3281 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
3282 EmitVarDecl(D: *IVDecl);
3283
3284 // Emit the iterations count variable.
3285 // If it is not a variable, Sema decided to calculate iterations count on each
3286 // iteration (e.g., it is foldable into a constant).
3287 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
3288 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
3289 // Emit calculation of the iterations count.
3290 EmitIgnoredExpr(E: S.getCalcLastIteration());
3291 }
3292
3293 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
3294
3295 bool HasLastprivateClause;
3296 // Check pre-condition.
3297 {
3298 OMPLoopScope PreInitScope(*this, S);
3299 // Skip the entire loop if we don't meet the precondition.
3300 // If the condition constant folds and can be elided, avoid emitting the
3301 // whole loop.
3302 bool CondConstant;
3303 llvm::BasicBlock *ContBlock = nullptr;
3304 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
3305 if (!CondConstant)
3306 return false;
3307 } else {
3308 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
3309 ContBlock = createBasicBlock(name: "omp.precond.end");
3310 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
3311 TrueCount: getProfileCount(&S));
3312 EmitBlock(BB: ThenBlock);
3313 incrementProfileCounter(&S);
3314 }
3315
3316 RunCleanupsScope DoacrossCleanupScope(*this);
3317 bool Ordered = false;
3318 if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) {
3319 if (OrderedClause->getNumForLoops())
3320 RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations());
3321 else
3322 Ordered = true;
3323 }
3324
3325 llvm::DenseSet<const Expr *> EmittedFinals;
3326 emitAlignedClause(*this, S);
3327 bool HasLinears = EmitOMPLinearClauseInit(D: S);
3328 // Emit helper vars inits.
3329
3330 std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
3331 LValue LB = Bounds.first;
3332 LValue UB = Bounds.second;
3333 LValue ST =
3334 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
3335 LValue IL =
3336 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
3337
3338 // Emit 'then' code.
3339 {
3340 OMPPrivateScope LoopScope(*this);
3341 if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) {
3342 // Emit implicit barrier to synchronize threads and avoid data races on
3343 // initialization of firstprivate variables and post-update of
3344 // lastprivate variables.
3345 CGM.getOpenMPRuntime().emitBarrierCall(
3346 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3347 /*ForceSimpleCall=*/true);
3348 }
3349 EmitOMPPrivateClause(S, LoopScope);
3350 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(
3351 *this, S, EmitLValue(E: S.getIterationVariable()));
3352 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
3353 EmitOMPReductionClauseInit(S, LoopScope);
3354 EmitOMPPrivateLoopCounters(S, LoopScope);
3355 EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
3356 (void)LoopScope.Privatize();
3357 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
3358 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
3359
3360 // Detect the loop schedule kind and chunk.
3361 const Expr *ChunkExpr = nullptr;
3362 OpenMPScheduleTy ScheduleKind;
3363 if (const auto *C = S.getSingleClause<OMPScheduleClause>()) {
3364 ScheduleKind.Schedule = C->getScheduleKind();
3365 ScheduleKind.M1 = C->getFirstScheduleModifier();
3366 ScheduleKind.M2 = C->getSecondScheduleModifier();
3367 ChunkExpr = C->getChunkSize();
3368 } else {
3369 // Default behaviour for schedule clause.
3370 CGM.getOpenMPRuntime().getDefaultScheduleAndChunk(
3371 CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr);
3372 }
3373 bool HasChunkSizeOne = false;
3374 llvm::Value *Chunk = nullptr;
3375 if (ChunkExpr) {
3376 Chunk = EmitScalarExpr(E: ChunkExpr);
3377 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(),
3378 DstTy: S.getIterationVariable()->getType(),
3379 Loc: S.getBeginLoc());
3380 Expr::EvalResult Result;
3381 if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) {
3382 llvm::APSInt EvaluatedChunk = Result.Val.getInt();
3383 HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1);
3384 }
3385 }
3386 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
3387 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
3388 // OpenMP 4.5, 2.7.1 Loop Construct, Description.
3389 // If the static schedule kind is specified or if the ordered clause is
3390 // specified, and if no monotonic modifier is specified, the effect will
3391 // be as if the monotonic modifier was specified.
3392 bool StaticChunkedOne =
3393 RT.isStaticChunked(ScheduleKind.Schedule,
3394 /* Chunked */ Chunk != nullptr) &&
3395 HasChunkSizeOne &&
3396 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
3397 bool IsMonotonic =
3398 Ordered ||
3399 (ScheduleKind.Schedule == OMPC_SCHEDULE_static &&
3400 !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic ||
3401 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) ||
3402 ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic ||
3403 ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
3404 if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule,
3405 /* Chunked */ Chunk != nullptr) ||
3406 StaticChunkedOne) &&
3407 !Ordered) {
3408 JumpDest LoopExit =
3409 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
3410 emitCommonSimdLoop(
3411 *this, S,
3412 [&S](CodeGenFunction &CGF, PrePostActionTy &) {
3413 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3414 CGF.EmitOMPSimdInit(D: S);
3415 } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) {
3416 if (C->getKind() == OMPC_ORDER_concurrent)
3417 CGF.LoopStack.setParallel(/*Enable=*/true);
3418 }
3419 },
3420 [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk,
3421 &S, ScheduleKind, LoopExit,
3422 &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
3423 // OpenMP [2.7.1, Loop Construct, Description, table 2-1]
3424 // When no chunk_size is specified, the iteration space is divided
3425 // into chunks that are approximately equal in size, and at most
3426 // one chunk is distributed to each thread. Note that the size of
3427 // the chunks is unspecified in this case.
3428 CGOpenMPRuntime::StaticRTInput StaticInit(
3429 IVSize, IVSigned, Ordered, IL.getAddress(CGF),
3430 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF),
3431 StaticChunkedOne ? Chunk : nullptr);
3432 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
3433 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind,
3434 StaticInit);
3435 // UB = min(UB, GlobalUB);
3436 if (!StaticChunkedOne)
3437 CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound());
3438 // IV = LB;
3439 CGF.EmitIgnoredExpr(E: S.getInit());
3440 // For unchunked static schedule generate:
3441 //
3442 // while (idx <= UB) {
3443 // BODY;
3444 // ++idx;
3445 // }
3446 //
3447 // For static schedule with chunk one:
3448 //
3449 // while (IV <= PrevUB) {
3450 // BODY;
3451 // IV += ST;
3452 // }
3453 CGF.EmitOMPInnerLoop(
3454 S, LoopScope.requiresCleanups(),
3455 StaticChunkedOne ? S.getCombinedParForInDistCond()
3456 : S.getCond(),
3457 StaticChunkedOne ? S.getDistInc() : S.getInc(),
3458 [&S, LoopExit](CodeGenFunction &CGF) {
3459 emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit);
3460 },
3461 [](CodeGenFunction &) {});
3462 });
3463 EmitBlock(BB: LoopExit.getBlock());
3464 // Tell the runtime we are done.
3465 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
3466 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
3467 OMPD_for);
3468 };
3469 OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen);
3470 } else {
3471 // Emit the outer loop, which requests its work chunk [LB..UB] from
3472 // runtime and runs the inner loop to process it.
3473 OMPLoopArguments LoopArguments(
3474 LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this),
3475 IL.getAddress(CGF&: *this), Chunk, EUB);
3476 LoopArguments.DKind = OMPD_for;
3477 EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
3478 LoopArgs: LoopArguments, CGDispatchBounds);
3479 }
3480 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
3481 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
3482 return CGF.Builder.CreateIsNotNull(
3483 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3484 });
3485 }
3486 EmitOMPReductionClauseFinal(
3487 S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind())
3488 ? /*Parallel and Simd*/ OMPD_parallel_for_simd
3489 : /*Parallel only*/ OMPD_parallel);
3490 // Emit post-update of the reduction variables if IsLastIter != 0.
3491 emitPostUpdateForReductionClause(
3492 *this, S, [IL, &S](CodeGenFunction &CGF) {
3493 return CGF.Builder.CreateIsNotNull(
3494 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3495 });
3496 // Emit final copy of the lastprivate variables if IsLastIter != 0.
3497 if (HasLastprivateClause)
3498 EmitOMPLastprivateClauseFinal(
3499 S, isOpenMPSimdDirective(S.getDirectiveKind()),
3500 Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc())));
3501 LoopScope.restoreMap();
3502 EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) {
3503 return CGF.Builder.CreateIsNotNull(
3504 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
3505 });
3506 }
3507 DoacrossCleanupScope.ForceCleanup();
3508 // We're now done with the loop, so jump to the continuation block.
3509 if (ContBlock) {
3510 EmitBranch(Block: ContBlock);
3511 EmitBlock(BB: ContBlock, /*IsFinished=*/true);
3512 }
3513 }
3514 return HasLastprivateClause;
3515}
3516
3517/// The following two functions generate expressions for the loop lower
3518/// and upper bounds in case of static and dynamic (dispatch) schedule
3519/// of the associated 'for' or 'distribute' loop.
3520static std::pair<LValue, LValue>
3521emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
3522 const auto &LS = cast<OMPLoopDirective>(Val: S);
3523 LValue LB =
3524 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable()));
3525 LValue UB =
3526 EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable()));
3527 return {LB, UB};
3528}
3529
3530/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
3531/// consider the lower and upper bound expressions generated by the
3532/// worksharing loop support, but we use 0 and the iteration space size as
3533/// constants
3534static std::pair<llvm::Value *, llvm::Value *>
3535emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
3536 Address LB, Address UB) {
3537 const auto &LS = cast<OMPLoopDirective>(Val: S);
3538 const Expr *IVExpr = LS.getIterationVariable();
3539 const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType());
3540 llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0);
3541 llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration());
3542 return {LBVal, UBVal};
3543}
3544
3545/// Emits internal temp array declarations for the directive with inscan
3546/// reductions.
3547/// The code is the following:
3548/// \code
3549/// size num_iters = <num_iters>;
3550/// <type> buffer[num_iters];
3551/// \endcode
3552static void emitScanBasedDirectiveDecls(
3553 CodeGenFunction &CGF, const OMPLoopDirective &S,
3554 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3555 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3556 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3557 SmallVector<const Expr *, 4> Shareds;
3558 SmallVector<const Expr *, 4> Privates;
3559 SmallVector<const Expr *, 4> ReductionOps;
3560 SmallVector<const Expr *, 4> CopyArrayTemps;
3561 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3562 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3563 "Only inscan reductions are expected.");
3564 Shareds.append(C->varlist_begin(), C->varlist_end());
3565 Privates.append(C->privates().begin(), C->privates().end());
3566 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3567 CopyArrayTemps.append(C->copy_array_temps().begin(),
3568 C->copy_array_temps().end());
3569 }
3570 {
3571 // Emit buffers for each reduction variables.
3572 // ReductionCodeGen is required to emit correctly the code for array
3573 // reductions.
3574 ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps);
3575 unsigned Count = 0;
3576 auto *ITA = CopyArrayTemps.begin();
3577 for (const Expr *IRef : Privates) {
3578 const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl());
3579 // Emit variably modified arrays, used for arrays/array sections
3580 // reductions.
3581 if (PrivateVD->getType()->isVariablyModifiedType()) {
3582 RedCG.emitSharedOrigLValue(CGF, N: Count);
3583 RedCG.emitAggregateType(CGF, N: Count);
3584 }
3585 CodeGenFunction::OpaqueValueMapping DimMapping(
3586 CGF,
3587 cast<OpaqueValueExpr>(
3588 cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe())
3589 ->getSizeExpr()),
3590 RValue::get(V: OMPScanNumIterations));
3591 // Emit temp buffer.
3592 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl()));
3593 ++ITA;
3594 ++Count;
3595 }
3596 }
3597}
3598
3599/// Copies final inscan reductions values to the original variables.
3600/// The code is the following:
3601/// \code
3602/// <orig_var> = buffer[num_iters-1];
3603/// \endcode
3604static void emitScanBasedDirectiveFinals(
3605 CodeGenFunction &CGF, const OMPLoopDirective &S,
3606 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) {
3607 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3608 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3609 SmallVector<const Expr *, 4> Shareds;
3610 SmallVector<const Expr *, 4> LHSs;
3611 SmallVector<const Expr *, 4> RHSs;
3612 SmallVector<const Expr *, 4> Privates;
3613 SmallVector<const Expr *, 4> CopyOps;
3614 SmallVector<const Expr *, 4> CopyArrayElems;
3615 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3616 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3617 "Only inscan reductions are expected.");
3618 Shareds.append(C->varlist_begin(), C->varlist_end());
3619 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3620 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3621 Privates.append(C->privates().begin(), C->privates().end());
3622 CopyOps.append(C->copy_ops().begin(), C->copy_ops().end());
3623 CopyArrayElems.append(C->copy_array_elems().begin(),
3624 C->copy_array_elems().end());
3625 }
3626 // Create temp var and copy LHS value to this temp value.
3627 // LHS = TMP[LastIter];
3628 llvm::Value *OMPLast = CGF.Builder.CreateNSWSub(
3629 LHS: OMPScanNumIterations,
3630 RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false));
3631 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
3632 const Expr *PrivateExpr = Privates[I];
3633 const Expr *OrigExpr = Shareds[I];
3634 const Expr *CopyArrayElem = CopyArrayElems[I];
3635 CodeGenFunction::OpaqueValueMapping IdxMapping(
3636 CGF,
3637 cast<OpaqueValueExpr>(
3638 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3639 RValue::get(V: OMPLast));
3640 LValue DestLVal = CGF.EmitLValue(E: OrigExpr);
3641 LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem);
3642 CGF.EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF),
3643 SrcAddr: SrcLVal.getAddress(CGF),
3644 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
3645 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
3646 Copy: CopyOps[I]);
3647 }
3648}
3649
3650/// Emits the code for the directive with inscan reductions.
3651/// The code is the following:
3652/// \code
3653/// #pragma omp ...
3654/// for (i: 0..<num_iters>) {
3655/// <input phase>;
3656/// buffer[i] = red;
3657/// }
3658/// #pragma omp master // in parallel region
3659/// for (int k = 0; k != ceil(log2(num_iters)); ++k)
3660/// for (size cnt = last_iter; cnt >= pow(2, k); --k)
3661/// buffer[i] op= buffer[i-pow(2,k)];
3662/// #pragma omp barrier // in parallel region
3663/// #pragma omp ...
3664/// for (0..<num_iters>) {
3665/// red = InclusiveScan ? buffer[i] : buffer[i-1];
3666/// <scan phase>;
3667/// }
3668/// \endcode
3669static void emitScanBasedDirective(
3670 CodeGenFunction &CGF, const OMPLoopDirective &S,
3671 llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen,
3672 llvm::function_ref<void(CodeGenFunction &)> FirstGen,
3673 llvm::function_ref<void(CodeGenFunction &)> SecondGen) {
3674 llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast(
3675 V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false);
3676 SmallVector<const Expr *, 4> Privates;
3677 SmallVector<const Expr *, 4> ReductionOps;
3678 SmallVector<const Expr *, 4> LHSs;
3679 SmallVector<const Expr *, 4> RHSs;
3680 SmallVector<const Expr *, 4> CopyArrayElems;
3681 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
3682 assert(C->getModifier() == OMPC_REDUCTION_inscan &&
3683 "Only inscan reductions are expected.");
3684 Privates.append(C->privates().begin(), C->privates().end());
3685 ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end());
3686 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
3687 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
3688 CopyArrayElems.append(C->copy_array_elems().begin(),
3689 C->copy_array_elems().end());
3690 }
3691 CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S);
3692 {
3693 // Emit loop with input phase:
3694 // #pragma omp ...
3695 // for (i: 0..<num_iters>) {
3696 // <input phase>;
3697 // buffer[i] = red;
3698 // }
3699 CGF.OMPFirstScanLoop = true;
3700 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3701 FirstGen(CGF);
3702 }
3703 // #pragma omp barrier // in parallel region
3704 auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems,
3705 &ReductionOps,
3706 &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) {
3707 Action.Enter(CGF);
3708 // Emit prefix reduction:
3709 // #pragma omp master // in parallel region
3710 // for (int k = 0; k <= ceil(log2(n)); ++k)
3711 llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock();
3712 llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body");
3713 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit");
3714 llvm::Function *F =
3715 CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy);
3716 llvm::Value *Arg =
3717 CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy);
3718 llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg);
3719 F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy);
3720 LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal);
3721 LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy);
3722 llvm::Value *NMin1 = CGF.Builder.CreateNUWSub(
3723 LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3724 auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc());
3725 CGF.EmitBlock(BB: LoopBB);
3726 auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2);
3727 // size pow2k = 1;
3728 auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3729 Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB);
3730 Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB);
3731 // for (size i = n - 1; i >= 2 ^ k; --i)
3732 // tmp[i] op= tmp[i-pow2k];
3733 llvm::BasicBlock *InnerLoopBB =
3734 CGF.createBasicBlock(name: "omp.inner.log.scan.body");
3735 llvm::BasicBlock *InnerExitBB =
3736 CGF.createBasicBlock(name: "omp.inner.log.scan.exit");
3737 llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K);
3738 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3739 CGF.EmitBlock(BB: InnerLoopBB);
3740 auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2);
3741 IVal->addIncoming(V: NMin1, BB: LoopBB);
3742 {
3743 CodeGenFunction::OMPPrivateScope PrivScope(CGF);
3744 auto *ILHS = LHSs.begin();
3745 auto *IRHS = RHSs.begin();
3746 for (const Expr *CopyArrayElem : CopyArrayElems) {
3747 const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl());
3748 const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl());
3749 Address LHSAddr = Address::invalid();
3750 {
3751 CodeGenFunction::OpaqueValueMapping IdxMapping(
3752 CGF,
3753 cast<OpaqueValueExpr>(
3754 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3755 RValue::get(V: IVal));
3756 LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(CGF);
3757 }
3758 PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr);
3759 Address RHSAddr = Address::invalid();
3760 {
3761 llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K);
3762 CodeGenFunction::OpaqueValueMapping IdxMapping(
3763 CGF,
3764 cast<OpaqueValueExpr>(
3765 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
3766 RValue::get(V: OffsetIVal));
3767 RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(CGF);
3768 }
3769 PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr);
3770 ++ILHS;
3771 ++IRHS;
3772 }
3773 PrivScope.Privatize();
3774 CGF.CGM.getOpenMPRuntime().emitReduction(
3775 CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
3776 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown});
3777 }
3778 llvm::Value *NextIVal =
3779 CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1));
3780 IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock());
3781 CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K);
3782 CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB);
3783 CGF.EmitBlock(BB: InnerExitBB);
3784 llvm::Value *Next =
3785 CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1));
3786 Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock());
3787 // pow2k <<= 1;
3788 llvm::Value *NextPow2K =
3789 CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "", /*HasNUW=*/true);
3790 Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock());
3791 llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal);
3792 CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB);
3793 auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc());
3794 CGF.EmitBlock(BB: ExitBB);
3795 };
3796 if (isOpenMPParallelDirective(S.getDirectiveKind())) {
3797 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
3798 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
3799 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
3800 /*ForceSimpleCall=*/true);
3801 } else {
3802 RegionCodeGenTy RCG(CodeGen);
3803 RCG(CGF);
3804 }
3805
3806 CGF.OMPFirstScanLoop = false;
3807 SecondGen(CGF);
3808}
3809
3810static bool emitWorksharingDirective(CodeGenFunction &CGF,
3811 const OMPLoopDirective &S,
3812 bool HasCancel) {
3813 bool HasLastprivates;
3814 if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
3815 [](const OMPReductionClause *C) {
3816 return C->getModifier() == OMPC_REDUCTION_inscan;
3817 })) {
3818 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
3819 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
3820 OMPLoopScope LoopScope(CGF, S);
3821 return CGF.EmitScalarExpr(E: S.getNumIterations());
3822 };
3823 const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) {
3824 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3825 CGF, S.getDirectiveKind(), HasCancel);
3826 (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3827 CodeGenLoopBounds: emitForLoopBounds,
3828 CGDispatchBounds: emitDispatchForLoopBounds);
3829 // Emit an implicit barrier at the end.
3830 CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(),
3831 OMPD_for);
3832 };
3833 const auto &&SecondGen = [&S, HasCancel,
3834 &HasLastprivates](CodeGenFunction &CGF) {
3835 CodeGenFunction::OMPCancelStackRAII CancelRegion(
3836 CGF, S.getDirectiveKind(), HasCancel);
3837 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3838 CodeGenLoopBounds: emitForLoopBounds,
3839 CGDispatchBounds: emitDispatchForLoopBounds);
3840 };
3841 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3842 emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen);
3843 emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen);
3844 if (!isOpenMPParallelDirective(S.getDirectiveKind()))
3845 emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen);
3846 } else {
3847 CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(),
3848 HasCancel);
3849 HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(),
3850 CodeGenLoopBounds: emitForLoopBounds,
3851 CGDispatchBounds: emitDispatchForLoopBounds);
3852 }
3853 return HasLastprivates;
3854}
3855
3856static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) {
3857 if (S.hasCancel())
3858 return false;
3859 for (OMPClause *C : S.clauses()) {
3860 if (isa<OMPNowaitClause>(C))
3861 continue;
3862
3863 if (auto *SC = dyn_cast<OMPScheduleClause>(C)) {
3864 if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3865 return false;
3866 if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown)
3867 return false;
3868 switch (SC->getScheduleKind()) {
3869 case OMPC_SCHEDULE_auto:
3870 case OMPC_SCHEDULE_dynamic:
3871 case OMPC_SCHEDULE_runtime:
3872 case OMPC_SCHEDULE_guided:
3873 case OMPC_SCHEDULE_static:
3874 continue;
3875 case OMPC_SCHEDULE_unknown:
3876 return false;
3877 }
3878 }
3879
3880 return false;
3881 }
3882
3883 return true;
3884}
3885
3886static llvm::omp::ScheduleKind
3887convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) {
3888 switch (ScheduleClauseKind) {
3889 case OMPC_SCHEDULE_unknown:
3890 return llvm::omp::OMP_SCHEDULE_Default;
3891 case OMPC_SCHEDULE_auto:
3892 return llvm::omp::OMP_SCHEDULE_Auto;
3893 case OMPC_SCHEDULE_dynamic:
3894 return llvm::omp::OMP_SCHEDULE_Dynamic;
3895 case OMPC_SCHEDULE_guided:
3896 return llvm::omp::OMP_SCHEDULE_Guided;
3897 case OMPC_SCHEDULE_runtime:
3898 return llvm::omp::OMP_SCHEDULE_Runtime;
3899 case OMPC_SCHEDULE_static:
3900 return llvm::omp::OMP_SCHEDULE_Static;
3901 }
3902 llvm_unreachable("Unhandled schedule kind");
3903}
3904
3905void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
3906 bool HasLastprivates = false;
3907 bool UseOMPIRBuilder =
3908 CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S);
3909 auto &&CodeGen = [this, &S, &HasLastprivates,
3910 UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) {
3911 // Use the OpenMPIRBuilder if enabled.
3912 if (UseOMPIRBuilder) {
3913 bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>();
3914
3915 llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default;
3916 llvm::Value *ChunkSize = nullptr;
3917 if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) {
3918 SchedKind =
3919 convertClauseKindToSchedKind(SchedClause->getScheduleKind());
3920 if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize())
3921 ChunkSize = EmitScalarExpr(E: ChunkSizeExpr);
3922 }
3923
3924 // Emit the associated statement and get its loop representation.
3925 const Stmt *Inner = S.getRawStmt();
3926 llvm::CanonicalLoopInfo *CLI =
3927 EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1);
3928
3929 llvm::OpenMPIRBuilder &OMPBuilder =
3930 CGM.getOpenMPRuntime().getOMPBuilder();
3931 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
3932 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
3933 OMPBuilder.applyWorkshareLoop(
3934 Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier,
3935 SchedKind, ChunkSize, /*HasSimdModifier=*/false,
3936 /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false,
3937 /*HasOrderedClause=*/false);
3938 return;
3939 }
3940
3941 HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel());
3942 };
3943 {
3944 auto LPCRegion =
3945 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3946 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3947 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen,
3948 S.hasCancel());
3949 }
3950
3951 if (!UseOMPIRBuilder) {
3952 // Emit an implicit barrier at the end.
3953 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3954 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3955 }
3956 // Check for outer lastprivate conditional update.
3957 checkForLastprivateConditionalUpdate(*this, S);
3958}
3959
3960void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
3961 bool HasLastprivates = false;
3962 auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
3963 PrePostActionTy &) {
3964 HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
3965 };
3966 {
3967 auto LPCRegion =
3968 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
3969 OMPLexicalScope Scope(*this, S, OMPD_unknown);
3970 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
3971 }
3972
3973 // Emit an implicit barrier at the end.
3974 if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates)
3975 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for);
3976 // Check for outer lastprivate conditional update.
3977 checkForLastprivateConditionalUpdate(*this, S);
3978}
3979
3980static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty,
3981 const Twine &Name,
3982 llvm::Value *Init = nullptr) {
3983 LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty);
3984 if (Init)
3985 CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true);
3986 return LVal;
3987}
3988
3989void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) {
3990 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
3991 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
3992 bool HasLastprivates = false;
3993 auto &&CodeGen = [&S, CapturedStmt, CS,
3994 &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) {
3995 const ASTContext &C = CGF.getContext();
3996 QualType KmpInt32Ty =
3997 C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3998 // Emit helper vars inits.
3999 LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb.",
4000 Init: CGF.Builder.getInt32(C: 0));
4001 llvm::ConstantInt *GlobalUBVal = CS != nullptr
4002 ? CGF.Builder.getInt32(C: CS->size() - 1)
4003 : CGF.Builder.getInt32(C: 0);
4004 LValue UB =
4005 createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub.", Init: GlobalUBVal);
4006 LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st.",
4007 Init: CGF.Builder.getInt32(C: 1));
4008 LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il.",
4009 Init: CGF.Builder.getInt32(C: 0));
4010 // Loop counter.
4011 LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv.");
4012 OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4013 CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV);
4014 OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue);
4015 CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB);
4016 // Generate condition for loop.
4017 BinaryOperator *Cond = BinaryOperator::Create(
4018 C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary,
4019 opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride());
4020 // Increment for loop counter.
4021 UnaryOperator *Inc = UnaryOperator::Create(
4022 C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary,
4023 S.getBeginLoc(), true, FPOptionsOverride());
4024 auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) {
4025 // Iterate through all sections and emit a switch construct:
4026 // switch (IV) {
4027 // case 0:
4028 // <SectionStmt[0]>;
4029 // break;
4030 // ...
4031 // case <NumSection> - 1:
4032 // <SectionStmt[<NumSection> - 1]>;
4033 // break;
4034 // }
4035 // .omp.sections.exit:
4036 llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit");
4037 llvm::SwitchInst *SwitchStmt =
4038 CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()),
4039 Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size());
4040 if (CS) {
4041 unsigned CaseNumber = 0;
4042 for (const Stmt *SubStmt : CS->children()) {
4043 auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4044 CGF.EmitBlock(BB: CaseBB);
4045 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB);
4046 CGF.EmitStmt(S: SubStmt);
4047 CGF.EmitBranch(Block: ExitBB);
4048 ++CaseNumber;
4049 }
4050 } else {
4051 llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case");
4052 CGF.EmitBlock(BB: CaseBB);
4053 SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB);
4054 CGF.EmitStmt(S: CapturedStmt);
4055 CGF.EmitBranch(Block: ExitBB);
4056 }
4057 CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true);
4058 };
4059
4060 CodeGenFunction::OMPPrivateScope LoopScope(CGF);
4061 if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) {
4062 // Emit implicit barrier to synchronize threads and avoid data races on
4063 // initialization of firstprivate variables and post-update of lastprivate
4064 // variables.
4065 CGF.CGM.getOpenMPRuntime().emitBarrierCall(
4066 CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
4067 /*ForceSimpleCall=*/true);
4068 }
4069 CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope);
4070 CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV);
4071 HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope);
4072 CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope);
4073 (void)LoopScope.Privatize();
4074 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
4075 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S);
4076
4077 // Emit static non-chunked loop.
4078 OpenMPScheduleTy ScheduleKind;
4079 ScheduleKind.Schedule = OMPC_SCHEDULE_static;
4080 CGOpenMPRuntime::StaticRTInput StaticInit(
4081 /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF),
4082 LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF));
4083 CGF.CGM.getOpenMPRuntime().emitForStaticInit(
4084 CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit);
4085 // UB = min(UB, GlobalUB);
4086 llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc());
4087 llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect(
4088 C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal);
4089 CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB);
4090 // IV = LB;
4091 CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV);
4092 // while (idx <= UB) { BODY; ++idx; }
4093 CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen,
4094 [](CodeGenFunction &) {});
4095 // Tell the runtime we are done.
4096 auto &&CodeGen = [&S](CodeGenFunction &CGF) {
4097 CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(),
4098 OMPD_sections);
4099 };
4100 CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen);
4101 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4102 // Emit post-update of the reduction variables if IsLastIter != 0.
4103 emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) {
4104 return CGF.Builder.CreateIsNotNull(
4105 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()));
4106 });
4107
4108 // Emit final copy of the lastprivate variables if IsLastIter != 0.
4109 if (HasLastprivates)
4110 CGF.EmitOMPLastprivateClauseFinal(
4111 D: S, /*NoFinals=*/false,
4112 IsLastIterCond: CGF.Builder.CreateIsNotNull(
4113 Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())));
4114 };
4115
4116 bool HasCancel = false;
4117 if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S))
4118 HasCancel = OSD->hasCancel();
4119 else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S))
4120 HasCancel = OPSD->hasCancel();
4121 OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel);
4122 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen,
4123 HasCancel);
4124 // Emit barrier for lastprivates only if 'sections' directive has 'nowait'
4125 // clause. Otherwise the barrier will be generated by the codegen for the
4126 // directive.
4127 if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) {
4128 // Emit implicit barrier to synchronize threads and avoid data races on
4129 // initialization of firstprivate variables.
4130 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4131 OMPD_unknown);
4132 }
4133}
4134
4135void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) {
4136 if (CGM.getLangOpts().OpenMPIRBuilder) {
4137 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4138 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4139 using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy;
4140
4141 auto FiniCB = [this](InsertPointTy IP) {
4142 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4143 };
4144
4145 const CapturedStmt *ICS = S.getInnermostCapturedStmt();
4146 const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt();
4147 const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt);
4148 llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector;
4149 if (CS) {
4150 for (const Stmt *SubStmt : CS->children()) {
4151 auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP,
4152 InsertPointTy CodeGenIP) {
4153 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4154 *this, SubStmt, AllocaIP, CodeGenIP, "section");
4155 };
4156 SectionCBVector.push_back(SectionCB);
4157 }
4158 } else {
4159 auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP,
4160 InsertPointTy CodeGenIP) {
4161 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4162 CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section");
4163 };
4164 SectionCBVector.push_back(Elt: SectionCB);
4165 }
4166
4167 // Privatization callback that performs appropriate action for
4168 // shared/private/firstprivate/lastprivate/copyin/... variables.
4169 //
4170 // TODO: This defaults to shared right now.
4171 auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
4172 llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) {
4173 // The next line is appropriate only for variables (Val) with the
4174 // data-sharing attribute "shared".
4175 ReplVal = &Val;
4176
4177 return CodeGenIP;
4178 };
4179
4180 CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP);
4181 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI);
4182 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP(
4183 AllocaInsertPt->getParent(), AllocaInsertPt->getIterator());
4184 Builder.restoreIP(IP: OMPBuilder.createSections(
4185 Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(),
4186 IsNowait: S.getSingleClause<OMPNowaitClause>()));
4187 return;
4188 }
4189 {
4190 auto LPCRegion =
4191 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4192 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4193 EmitSections(S);
4194 }
4195 // Emit an implicit barrier at the end.
4196 if (!S.getSingleClause<OMPNowaitClause>()) {
4197 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(),
4198 OMPD_sections);
4199 }
4200 // Check for outer lastprivate conditional update.
4201 checkForLastprivateConditionalUpdate(*this, S);
4202}
4203
4204void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) {
4205 if (CGM.getLangOpts().OpenMPIRBuilder) {
4206 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4207 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4208
4209 const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt();
4210 auto FiniCB = [this](InsertPointTy IP) {
4211 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4212 };
4213
4214 auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP,
4215 InsertPointTy CodeGenIP) {
4216 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4217 CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section");
4218 };
4219
4220 LexicalScope Scope(*this, S.getSourceRange());
4221 EmitStopPoint(&S);
4222 Builder.restoreIP(IP: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB));
4223
4224 return;
4225 }
4226 LexicalScope Scope(*this, S.getSourceRange());
4227 EmitStopPoint(&S);
4228 EmitStmt(S: S.getAssociatedStmt());
4229}
4230
4231void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) {
4232 llvm::SmallVector<const Expr *, 8> CopyprivateVars;
4233 llvm::SmallVector<const Expr *, 8> DestExprs;
4234 llvm::SmallVector<const Expr *, 8> SrcExprs;
4235 llvm::SmallVector<const Expr *, 8> AssignmentOps;
4236 // Check if there are any 'copyprivate' clauses associated with this
4237 // 'single' construct.
4238 // Build a list of copyprivate variables along with helper expressions
4239 // (<source>, <destination>, <destination>=<source> expressions)
4240 for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) {
4241 CopyprivateVars.append(C->varlists().begin(), C->varlists().end());
4242 DestExprs.append(C->destination_exprs().begin(),
4243 C->destination_exprs().end());
4244 SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end());
4245 AssignmentOps.append(C->assignment_ops().begin(),
4246 C->assignment_ops().end());
4247 }
4248 // Emit code for 'single' region along with 'copyprivate' clauses
4249 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4250 Action.Enter(CGF);
4251 OMPPrivateScope SingleScope(CGF);
4252 (void)CGF.EmitOMPFirstprivateClause(S, SingleScope);
4253 CGF.EmitOMPPrivateClause(S, SingleScope);
4254 (void)SingleScope.Privatize();
4255 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
4256 };
4257 {
4258 auto LPCRegion =
4259 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4260 OMPLexicalScope Scope(*this, S, OMPD_unknown);
4261 CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(),
4262 CopyprivateVars, DestExprs,
4263 SrcExprs, AssignmentOps);
4264 }
4265 // Emit an implicit barrier at the end (to avoid data race on firstprivate
4266 // init or if no 'nowait' clause was specified and no 'copyprivate' clause).
4267 if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) {
4268 CGM.getOpenMPRuntime().emitBarrierCall(
4269 *this, S.getBeginLoc(),
4270 S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single);
4271 }
4272 // Check for outer lastprivate conditional update.
4273 checkForLastprivateConditionalUpdate(*this, S);
4274}
4275
4276static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4277 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4278 Action.Enter(CGF);
4279 CGF.EmitStmt(S: S.getRawStmt());
4280 };
4281 CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
4282}
4283
4284void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) {
4285 if (CGM.getLangOpts().OpenMPIRBuilder) {
4286 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4287 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4288
4289 const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt();
4290
4291 auto FiniCB = [this](InsertPointTy IP) {
4292 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4293 };
4294
4295 auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP,
4296 InsertPointTy CodeGenIP) {
4297 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4298 CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master");
4299 };
4300
4301 LexicalScope Scope(*this, S.getSourceRange());
4302 EmitStopPoint(&S);
4303 Builder.restoreIP(IP: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB));
4304
4305 return;
4306 }
4307 LexicalScope Scope(*this, S.getSourceRange());
4308 EmitStopPoint(&S);
4309 emitMaster(*this, S);
4310}
4311
4312static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
4313 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4314 Action.Enter(CGF);
4315 CGF.EmitStmt(S: S.getRawStmt());
4316 };
4317 Expr *Filter = nullptr;
4318 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4319 Filter = FilterClause->getThreadID();
4320 CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(),
4321 Filter);
4322}
4323
4324void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) {
4325 if (CGM.getLangOpts().OpenMPIRBuilder) {
4326 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4327 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4328
4329 const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt();
4330 const Expr *Filter = nullptr;
4331 if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>())
4332 Filter = FilterClause->getThreadID();
4333 llvm::Value *FilterVal = Filter
4334 ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty)
4335 : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0);
4336
4337 auto FiniCB = [this](InsertPointTy IP) {
4338 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4339 };
4340
4341 auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP,
4342 InsertPointTy CodeGenIP) {
4343 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4344 CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked");
4345 };
4346
4347 LexicalScope Scope(*this, S.getSourceRange());
4348 EmitStopPoint(&S);
4349 Builder.restoreIP(
4350 IP: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal));
4351
4352 return;
4353 }
4354 LexicalScope Scope(*this, S.getSourceRange());
4355 EmitStopPoint(&S);
4356 emitMasked(*this, S);
4357}
4358
4359void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) {
4360 if (CGM.getLangOpts().OpenMPIRBuilder) {
4361 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
4362 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
4363
4364 const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt();
4365 const Expr *Hint = nullptr;
4366 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4367 Hint = HintClause->getHint();
4368
4369 // TODO: This is slightly different from what's currently being done in
4370 // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything
4371 // about typing is final.
4372 llvm::Value *HintInst = nullptr;
4373 if (Hint)
4374 HintInst =
4375 Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false);
4376
4377 auto FiniCB = [this](InsertPointTy IP) {
4378 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
4379 };
4380
4381 auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP,
4382 InsertPointTy CodeGenIP) {
4383 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
4384 CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical");
4385 };
4386
4387 LexicalScope Scope(*this, S.getSourceRange());
4388 EmitStopPoint(&S);
4389 Builder.restoreIP(IP: OMPBuilder.createCritical(
4390 Loc: Builder, BodyGenCB, FiniCB, CriticalName: S.getDirectiveName().getAsString(),
4391 HintInst));
4392
4393 return;
4394 }
4395
4396 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4397 Action.Enter(CGF);
4398 CGF.EmitStmt(S: S.getAssociatedStmt());
4399 };
4400 const Expr *Hint = nullptr;
4401 if (const auto *HintClause = S.getSingleClause<OMPHintClause>())
4402 Hint = HintClause->getHint();
4403 LexicalScope Scope(*this, S.getSourceRange());
4404 EmitStopPoint(&S);
4405 CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this,
4406 CriticalName: S.getDirectiveName().getAsString(),
4407 CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint);
4408}
4409
4410void CodeGenFunction::EmitOMPParallelForDirective(
4411 const OMPParallelForDirective &S) {
4412 // Emit directive as a combined directive that consists of two implicit
4413 // directives: 'parallel' with 'for' directive.
4414 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4415 Action.Enter(CGF);
4416 emitOMPCopyinClause(CGF, S);
4417 (void)emitWorksharingDirective(CGF, S, S.hasCancel());
4418 };
4419 {
4420 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4421 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4422 CGCapturedStmtInfo CGSI(CR_OpenMP);
4423 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4424 OMPLoopScope LoopScope(CGF, S);
4425 return CGF.EmitScalarExpr(S.getNumIterations());
4426 };
4427 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4428 [](const OMPReductionClause *C) {
4429 return C->getModifier() == OMPC_REDUCTION_inscan;
4430 });
4431 if (IsInscan)
4432 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4433 auto LPCRegion =
4434 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4435 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
4436 emitEmptyBoundParameters);
4437 if (IsInscan)
4438 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4439 }
4440 // Check for outer lastprivate conditional update.
4441 checkForLastprivateConditionalUpdate(*this, S);
4442}
4443
4444void CodeGenFunction::EmitOMPParallelForSimdDirective(
4445 const OMPParallelForSimdDirective &S) {
4446 // Emit directive as a combined directive that consists of two implicit
4447 // directives: 'parallel' with 'for' directive.
4448 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4449 Action.Enter(CGF);
4450 emitOMPCopyinClause(CGF, S);
4451 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
4452 };
4453 {
4454 const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) {
4455 CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF);
4456 CGCapturedStmtInfo CGSI(CR_OpenMP);
4457 CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI);
4458 OMPLoopScope LoopScope(CGF, S);
4459 return CGF.EmitScalarExpr(S.getNumIterations());
4460 };
4461 bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(),
4462 [](const OMPReductionClause *C) {
4463 return C->getModifier() == OMPC_REDUCTION_inscan;
4464 });
4465 if (IsInscan)
4466 emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen);
4467 auto LPCRegion =
4468 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4469 emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen,
4470 emitEmptyBoundParameters);
4471 if (IsInscan)
4472 emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen);
4473 }
4474 // Check for outer lastprivate conditional update.
4475 checkForLastprivateConditionalUpdate(*this, S);
4476}
4477
4478void CodeGenFunction::EmitOMPParallelMasterDirective(
4479 const OMPParallelMasterDirective &S) {
4480 // Emit directive as a combined directive that consists of two implicit
4481 // directives: 'parallel' with 'master' directive.
4482 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4483 Action.Enter(CGF);
4484 OMPPrivateScope PrivateScope(CGF);
4485 emitOMPCopyinClause(CGF, S);
4486 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4487 CGF.EmitOMPPrivateClause(S, PrivateScope);
4488 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4489 (void)PrivateScope.Privatize();
4490 emitMaster(CGF, S);
4491 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4492 };
4493 {
4494 auto LPCRegion =
4495 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4496 emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen,
4497 emitEmptyBoundParameters);
4498 emitPostUpdateForReductionClause(*this, S,
4499 [](CodeGenFunction &) { return nullptr; });
4500 }
4501 // Check for outer lastprivate conditional update.
4502 checkForLastprivateConditionalUpdate(*this, S);
4503}
4504
4505void CodeGenFunction::EmitOMPParallelMaskedDirective(
4506 const OMPParallelMaskedDirective &S) {
4507 // Emit directive as a combined directive that consists of two implicit
4508 // directives: 'parallel' with 'masked' directive.
4509 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4510 Action.Enter(CGF);
4511 OMPPrivateScope PrivateScope(CGF);
4512 emitOMPCopyinClause(CGF, S);
4513 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
4514 CGF.EmitOMPPrivateClause(S, PrivateScope);
4515 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
4516 (void)PrivateScope.Privatize();
4517 emitMasked(CGF, S);
4518 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
4519 };
4520 {
4521 auto LPCRegion =
4522 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4523 emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen,
4524 emitEmptyBoundParameters);
4525 emitPostUpdateForReductionClause(*this, S,
4526 [](CodeGenFunction &) { return nullptr; });
4527 }
4528 // Check for outer lastprivate conditional update.
4529 checkForLastprivateConditionalUpdate(*this, S);
4530}
4531
4532void CodeGenFunction::EmitOMPParallelSectionsDirective(
4533 const OMPParallelSectionsDirective &S) {
4534 // Emit directive as a combined directive that consists of two implicit
4535 // directives: 'parallel' with 'sections' directive.
4536 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
4537 Action.Enter(CGF);
4538 emitOMPCopyinClause(CGF, S);
4539 CGF.EmitSections(S);
4540 };
4541 {
4542 auto LPCRegion =
4543 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
4544 emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
4545 emitEmptyBoundParameters);
4546 }
4547 // Check for outer lastprivate conditional update.
4548 checkForLastprivateConditionalUpdate(*this, S);
4549}
4550
4551namespace {
4552/// Get the list of variables declared in the context of the untied tasks.
4553class CheckVarsEscapingUntiedTaskDeclContext final
4554 : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> {
4555 llvm::SmallVector<const VarDecl *, 4> PrivateDecls;
4556
4557public:
4558 explicit CheckVarsEscapingUntiedTaskDeclContext() = default;
4559 virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default;
4560 void VisitDeclStmt(const DeclStmt *S) {
4561 if (!S)
4562 return;
4563 // Need to privatize only local vars, static locals can be processed as is.
4564 for (const Decl *D : S->decls()) {
4565 if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D))
4566 if (VD->hasLocalStorage())
4567 PrivateDecls.push_back(Elt: VD);
4568 }
4569 }
4570 void VisitOMPExecutableDirective(const OMPExecutableDirective *) {}
4571 void VisitCapturedStmt(const CapturedStmt *) {}
4572 void VisitLambdaExpr(const LambdaExpr *) {}
4573 void VisitBlockExpr(const BlockExpr *) {}
4574 void VisitStmt(const Stmt *S) {
4575 if (!S)
4576 return;
4577 for (const Stmt *Child : S->children())
4578 if (Child)
4579 Visit(Child);
4580 }
4581
4582 /// Swaps list of vars with the provided one.
4583 ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; }
4584};
4585} // anonymous namespace
4586
4587static void buildDependences(const OMPExecutableDirective &S,
4588 OMPTaskDataTy &Data) {
4589
4590 // First look for 'omp_all_memory' and add this first.
4591 bool OmpAllMemory = false;
4592 if (llvm::any_of(
4593 Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) {
4594 return C->getDependencyKind() == OMPC_DEPEND_outallmemory ||
4595 C->getDependencyKind() == OMPC_DEPEND_inoutallmemory;
4596 })) {
4597 OmpAllMemory = true;
4598 // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are
4599 // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to
4600 // simplify.
4601 OMPTaskDataTy::DependData &DD =
4602 Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory,
4603 /*IteratorExpr=*/Args: nullptr);
4604 // Add a nullptr Expr to simplify the codegen in emitDependData.
4605 DD.DepExprs.push_back(Elt: nullptr);
4606 }
4607 // Add remaining dependences skipping any 'out' or 'inout' if they are
4608 // overridden by 'omp_all_memory'.
4609 for (const auto *C : S.getClausesOfKind<OMPDependClause>()) {
4610 OpenMPDependClauseKind Kind = C->getDependencyKind();
4611 if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory)
4612 continue;
4613 if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout))
4614 continue;
4615 OMPTaskDataTy::DependData &DD =
4616 Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier());
4617 DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
4618 }
4619}
4620
4621void CodeGenFunction::EmitOMPTaskBasedDirective(
4622 const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion,
4623 const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen,
4624 OMPTaskDataTy &Data) {
4625 // Emit outlined function for task construct.
4626 const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion);
4627 auto I = CS->getCapturedDecl()->param_begin();
4628 auto PartId = std::next(I);
4629 auto TaskT = std::next(I, 4);
4630 // Check if the task is final
4631 if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) {
4632 // If the condition constant folds and can be elided, try to avoid emitting
4633 // the condition and the dead arm of the if/else.
4634 const Expr *Cond = Clause->getCondition();
4635 bool CondConstant;
4636 if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant))
4637 Data.Final.setInt(CondConstant);
4638 else
4639 Data.Final.setPointer(EvaluateExprAsBool(E: Cond));
4640 } else {
4641 // By default the task is not final.
4642 Data.Final.setInt(/*IntVal=*/false);
4643 }
4644 // Check if the task has 'priority' clause.
4645 if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) {
4646 const Expr *Prio = Clause->getPriority();
4647 Data.Priority.setInt(/*IntVal=*/true);
4648 Data.Priority.setPointer(EmitScalarConversion(
4649 Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(),
4650 DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1),
4651 Loc: Prio->getExprLoc()));
4652 }
4653 // The first function argument for tasks is a thread id, the second one is a
4654 // part id (0 for tied tasks, >=0 for untied task).
4655 llvm::DenseSet<const VarDecl *> EmittedAsPrivate;
4656 // Get list of private variables.
4657 for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) {
4658 auto IRef = C->varlist_begin();
4659 for (const Expr *IInit : C->private_copies()) {
4660 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4661 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4662 Data.PrivateVars.push_back(Elt: *IRef);
4663 Data.PrivateCopies.push_back(Elt: IInit);
4664 }
4665 ++IRef;
4666 }
4667 }
4668 EmittedAsPrivate.clear();
4669 // Get list of firstprivate variables.
4670 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
4671 auto IRef = C->varlist_begin();
4672 auto IElemInitRef = C->inits().begin();
4673 for (const Expr *IInit : C->private_copies()) {
4674 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4675 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4676 Data.FirstprivateVars.push_back(Elt: *IRef);
4677 Data.FirstprivateCopies.push_back(Elt: IInit);
4678 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
4679 }
4680 ++IRef;
4681 ++IElemInitRef;
4682 }
4683 }
4684 // Get list of lastprivate variables (for taskloops).
4685 llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs;
4686 for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
4687 auto IRef = C->varlist_begin();
4688 auto ID = C->destination_exprs().begin();
4689 for (const Expr *IInit : C->private_copies()) {
4690 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl());
4691 if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) {
4692 Data.LastprivateVars.push_back(Elt: *IRef);
4693 Data.LastprivateCopies.push_back(Elt: IInit);
4694 }
4695 LastprivateDstsOrigs.insert(
4696 std::make_pair(cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()),
4697 cast<DeclRefExpr>(*IRef)));
4698 ++IRef;
4699 ++ID;
4700 }
4701 }
4702 SmallVector<const Expr *, 4> LHSs;
4703 SmallVector<const Expr *, 4> RHSs;
4704 for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) {
4705 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
4706 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
4707 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
4708 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
4709 in_end: C->reduction_ops().end());
4710 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
4711 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
4712 }
4713 Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit(
4714 CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data);
4715 // Build list of dependences.
4716 buildDependences(S, Data);
4717 // Get list of local vars for untied tasks.
4718 if (!Data.Tied) {
4719 CheckVarsEscapingUntiedTaskDeclContext Checker;
4720 Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt());
4721 Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(),
4722 in_end: Checker.getPrivateDecls().end());
4723 }
4724 auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs,
4725 CapturedRegion](CodeGenFunction &CGF,
4726 PrePostActionTy &Action) {
4727 llvm::MapVector<CanonicalDeclPtr<const VarDecl>,
4728 std::pair<Address, Address>>
4729 UntiedLocalVars;
4730 // Set proper addresses for generated private copies.
4731 OMPPrivateScope Scope(CGF);
4732 // Generate debug info for variables present in shared clause.
4733 if (auto *DI = CGF.getDebugInfo()) {
4734 llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields =
4735 CGF.CapturedStmtInfo->getCaptureFields();
4736 llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue();
4737 if (CaptureFields.size() && ContextValue) {
4738 unsigned CharWidth = CGF.getContext().getCharWidth();
4739 // The shared variables are packed together as members of structure.
4740 // So the address of each shared variable can be computed by adding
4741 // offset of it (within record) to the base address of record. For each
4742 // shared variable, debug intrinsic llvm.dbg.declare is generated with
4743 // appropriate expressions (DIExpression).
4744 // Ex:
4745 // %12 = load %struct.anon*, %struct.anon** %__context.addr.i
4746 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4747 // metadata !svar1,
4748 // metadata !DIExpression(DW_OP_deref))
4749 // call void @llvm.dbg.declare(metadata %struct.anon* %12,
4750 // metadata !svar2,
4751 // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref))
4752 for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) {
4753 const VarDecl *SharedVar = It->first;
4754 RecordDecl *CaptureRecord = It->second->getParent();
4755 const ASTRecordLayout &Layout =
4756 CGF.getContext().getASTRecordLayout(D: CaptureRecord);
4757 unsigned Offset =
4758 Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth;
4759 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4760 (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue,
4761 Builder&: CGF.Builder, UsePointerValue: false);
4762 // Get the call dbg.declare instruction we just created and update
4763 // its DIExpression to add offset to base address.
4764 auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare,
4765 unsigned Offset) {
4766 SmallVector<uint64_t, 8> Ops;
4767 // Add offset to the base address if non zero.
4768 if (Offset) {
4769 Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst);
4770 Ops.push_back(Elt: Offset);
4771 }
4772 Ops.push_back(Elt: llvm::dwarf::DW_OP_deref);
4773 Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops));
4774 };
4775 llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back();
4776 if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last))
4777 UpdateExpr(DDI->getContext(), DDI, Offset);
4778 // If we're emitting using the new debug info format into a block
4779 // without a terminator, the record will be "trailing".
4780 assert(!Last.isTerminator() && "unexpected terminator");
4781 if (auto *Marker =
4782 CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) {
4783 for (llvm::DbgVariableRecord &DVR : llvm::reverse(
4784 C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) {
4785 UpdateExpr(Last.getContext(), &DVR, Offset);
4786 break;
4787 }
4788 }
4789 }
4790 }
4791 }
4792 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs;
4793 if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() ||
4794 !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) {
4795 enum { PrivatesParam = 2, CopyFnParam = 3 };
4796 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
4797 Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: CopyFnParam)));
4798 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
4799 CS->getCapturedDecl()->getParam(i: PrivatesParam)));
4800 // Map privates.
4801 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
4802 llvm::SmallVector<llvm::Value *, 16> CallArgs;
4803 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
4804 CallArgs.push_back(Elt: PrivatesPtr);
4805 ParamTypes.push_back(Elt: PrivatesPtr->getType());
4806 for (const Expr *E : Data.PrivateVars) {
4807 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4808 RawAddress PrivatePtr = CGF.CreateMemTemp(
4809 T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr");
4810 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4811 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4812 ParamTypes.push_back(Elt: PrivatePtr.getType());
4813 }
4814 for (const Expr *E : Data.FirstprivateVars) {
4815 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4816 RawAddress PrivatePtr =
4817 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
4818 Name: ".firstpriv.ptr.addr");
4819 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4820 FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4821 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4822 ParamTypes.push_back(Elt: PrivatePtr.getType());
4823 }
4824 for (const Expr *E : Data.LastprivateVars) {
4825 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
4826 RawAddress PrivatePtr =
4827 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
4828 Name: ".lastpriv.ptr.addr");
4829 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
4830 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4831 ParamTypes.push_back(Elt: PrivatePtr.getType());
4832 }
4833 for (const VarDecl *VD : Data.PrivateLocals) {
4834 QualType Ty = VD->getType().getNonReferenceType();
4835 if (VD->getType()->isLValueReferenceType())
4836 Ty = CGF.getContext().getPointerType(T: Ty);
4837 if (isAllocatableDecl(VD))
4838 Ty = CGF.getContext().getPointerType(T: Ty);
4839 RawAddress PrivatePtr = CGF.CreateMemTemp(
4840 T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr");
4841 auto Result = UntiedLocalVars.insert(
4842 KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())));
4843 // If key exists update in place.
4844 if (Result.second == false)
4845 *Result.first = std::make_pair(
4846 x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()));
4847 CallArgs.push_back(Elt: PrivatePtr.getPointer());
4848 ParamTypes.push_back(Elt: PrivatePtr.getType());
4849 }
4850 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
4851 Params: ParamTypes, /*isVarArg=*/false);
4852 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
4853 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
4854 for (const auto &Pair : LastprivateDstsOrigs) {
4855 const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl());
4856 DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD),
4857 /*RefersToEnclosingVariableOrCapture=*/
4858 CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr,
4859 Pair.second->getType(), VK_LValue,
4860 Pair.second->getExprLoc());
4861 Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(&DRE).getAddress(CGF));
4862 }
4863 for (const auto &Pair : PrivatePtrs) {
4864 Address Replacement = Address(
4865 CGF.Builder.CreateLoad(Addr: Pair.second),
4866 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
4867 CGF.getContext().getDeclAlign(Pair.first));
4868 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
4869 if (auto *DI = CGF.getDebugInfo())
4870 if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo())
4871 (void)DI->EmitDeclareOfAutoVariable(
4872 Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder,
4873 /*UsePointerValue*/ true);
4874 }
4875 // Adjust mapping for internal locals by mapping actual memory instead of
4876 // a pointer to this memory.
4877 for (auto &Pair : UntiedLocalVars) {
4878 QualType VDType = Pair.first->getType().getNonReferenceType();
4879 if (Pair.first->getType()->isLValueReferenceType())
4880 VDType = CGF.getContext().getPointerType(T: VDType);
4881 if (isAllocatableDecl(VD: Pair.first)) {
4882 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
4883 Address Replacement(
4884 Ptr,
4885 CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)),
4886 CGF.getPointerAlign());
4887 Pair.second.first = Replacement;
4888 Ptr = CGF.Builder.CreateLoad(Addr: Replacement);
4889 Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType),
4890 CGF.getContext().getDeclAlign(Pair.first));
4891 Pair.second.second = Replacement;
4892 } else {
4893 llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first);
4894 Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType),
4895 CGF.getContext().getDeclAlign(Pair.first));
4896 Pair.second.first = Replacement;
4897 }
4898 }
4899 }
4900 if (Data.Reductions) {
4901 OMPPrivateScope FirstprivateScope(CGF);
4902 for (const auto &Pair : FirstprivatePtrs) {
4903 Address Replacement(
4904 CGF.Builder.CreateLoad(Addr: Pair.second),
4905 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
4906 CGF.getContext().getDeclAlign(Pair.first));
4907 FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
4908 }
4909 (void)FirstprivateScope.Privatize();
4910 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
4911 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
4912 Data.ReductionCopies, Data.ReductionOps);
4913 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
4914 Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: 9)));
4915 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
4916 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
4917 RedCG.emitAggregateType(CGF, N: Cnt);
4918 // FIXME: This must removed once the runtime library is fixed.
4919 // Emit required threadprivate variables for
4920 // initializer/combiner/finalizer.
4921 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
4922 RCG&: RedCG, N: Cnt);
4923 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4924 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
4925 Replacement = Address(
4926 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
4927 SrcTy: CGF.getContext().VoidPtrTy,
4928 DstTy: CGF.getContext().getPointerType(
4929 T: Data.ReductionCopies[Cnt]->getType()),
4930 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
4931 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
4932 Replacement.getAlignment());
4933 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
4934 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
4935 }
4936 }
4937 // Privatize all private variables except for in_reduction items.
4938 (void)Scope.Privatize();
4939 SmallVector<const Expr *, 4> InRedVars;
4940 SmallVector<const Expr *, 4> InRedPrivs;
4941 SmallVector<const Expr *, 4> InRedOps;
4942 SmallVector<const Expr *, 4> TaskgroupDescriptors;
4943 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
4944 auto IPriv = C->privates().begin();
4945 auto IRed = C->reduction_ops().begin();
4946 auto ITD = C->taskgroup_descriptors().begin();
4947 for (const Expr *Ref : C->varlists()) {
4948 InRedVars.emplace_back(Ref);
4949 InRedPrivs.emplace_back(*IPriv);
4950 InRedOps.emplace_back(*IRed);
4951 TaskgroupDescriptors.emplace_back(*ITD);
4952 std::advance(IPriv, 1);
4953 std::advance(IRed, 1);
4954 std::advance(ITD, 1);
4955 }
4956 }
4957 // Privatize in_reduction items here, because taskgroup descriptors must be
4958 // privatized earlier.
4959 OMPPrivateScope InRedScope(CGF);
4960 if (!InRedVars.empty()) {
4961 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
4962 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
4963 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
4964 RedCG.emitAggregateType(CGF, N: Cnt);
4965 // The taskgroup descriptor variable is always implicit firstprivate and
4966 // privatized already during processing of the firstprivates.
4967 // FIXME: This must removed once the runtime library is fixed.
4968 // Emit required threadprivate variables for
4969 // initializer/combiner/finalizer.
4970 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
4971 RCG&: RedCG, N: Cnt);
4972 llvm::Value *ReductionsPtr;
4973 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
4974 ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr),
4975 Loc: TRExpr->getExprLoc());
4976 } else {
4977 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
4978 }
4979 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
4980 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
4981 Replacement = Address(
4982 CGF.EmitScalarConversion(
4983 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
4984 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
4985 Loc: InRedPrivs[Cnt]->getExprLoc()),
4986 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
4987 Replacement.getAlignment());
4988 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
4989 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
4990 }
4991 }
4992 (void)InRedScope.Privatize();
4993
4994 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII LocalVarsScope(CGF,
4995 UntiedLocalVars);
4996 Action.Enter(CGF);
4997 BodyGen(CGF);
4998 };
4999 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5000 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied,
5001 Data.NumberOfParts);
5002 OMPLexicalScope Scope(*this, S, std::nullopt,
5003 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5004 !isOpenMPSimdDirective(S.getDirectiveKind()));
5005 TaskGen(*this, OutlinedFn, Data);
5006}
5007
5008static ImplicitParamDecl *
5009createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data,
5010 QualType Ty, CapturedDecl *CD,
5011 SourceLocation Loc) {
5012 auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5013 ImplicitParamKind::Other);
5014 auto *OrigRef = DeclRefExpr::Create(
5015 C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD,
5016 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5017 auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty,
5018 ImplicitParamKind::Other);
5019 auto *PrivateRef = DeclRefExpr::Create(
5020 C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD,
5021 /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue);
5022 QualType ElemType = C.getBaseElementType(QT: Ty);
5023 auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType,
5024 ImplicitParamKind::Other);
5025 auto *InitRef = DeclRefExpr::Create(
5026 C, NestedNameSpecifierLoc(), SourceLocation(), InitVD,
5027 /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue);
5028 PrivateVD->setInitStyle(VarDecl::CInit);
5029 PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue,
5030 Operand: InitRef, /*BasePath=*/nullptr,
5031 Cat: VK_PRValue, FPO: FPOptionsOverride()));
5032 Data.FirstprivateVars.emplace_back(OrigRef);
5033 Data.FirstprivateCopies.emplace_back(PrivateRef);
5034 Data.FirstprivateInits.emplace_back(InitRef);
5035 return OrigVD;
5036}
5037
5038void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
5039 const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen,
5040 OMPTargetDataInfo &InputInfo) {
5041 // Emit outlined function for task construct.
5042 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5043 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5044 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5045 auto I = CS->getCapturedDecl()->param_begin();
5046 auto PartId = std::next(x: I);
5047 auto TaskT = std::next(x: I, n: 4);
5048 OMPTaskDataTy Data;
5049 // The task is not final.
5050 Data.Final.setInt(/*IntVal=*/false);
5051 // Get list of firstprivate variables.
5052 for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) {
5053 auto IRef = C->varlist_begin();
5054 auto IElemInitRef = C->inits().begin();
5055 for (auto *IInit : C->private_copies()) {
5056 Data.FirstprivateVars.push_back(Elt: *IRef);
5057 Data.FirstprivateCopies.push_back(Elt: IInit);
5058 Data.FirstprivateInits.push_back(Elt: *IElemInitRef);
5059 ++IRef;
5060 ++IElemInitRef;
5061 }
5062 }
5063 SmallVector<const Expr *, 4> LHSs;
5064 SmallVector<const Expr *, 4> RHSs;
5065 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5066 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5067 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5068 Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end());
5069 Data.ReductionOps.append(in_start: C->reduction_ops().begin(),
5070 in_end: C->reduction_ops().end());
5071 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5072 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5073 }
5074 OMPPrivateScope TargetScope(*this);
5075 VarDecl *BPVD = nullptr;
5076 VarDecl *PVD = nullptr;
5077 VarDecl *SVD = nullptr;
5078 VarDecl *MVD = nullptr;
5079 if (InputInfo.NumberOfTargetItems > 0) {
5080 auto *CD = CapturedDecl::Create(
5081 getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
5082 llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
5083 QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
5084 EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5085 /*IndexTypeQuals=*/0);
5086 BPVD = createImplicitFirstprivateForType(
5087 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5088 PVD = createImplicitFirstprivateForType(
5089 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5090 QualType SizesType = getContext().getConstantArrayType(
5091 EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
5092 ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal,
5093 /*IndexTypeQuals=*/0);
5094 SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
5095 S.getBeginLoc());
5096 TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray);
5097 TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray);
5098 TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray);
5099 // If there is no user-defined mapper, the mapper array will be nullptr. In
5100 // this case, we don't need to privatize it.
5101 if (!isa_and_nonnull<llvm::ConstantPointerNull>(
5102 Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) {
5103 MVD = createImplicitFirstprivateForType(
5104 getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
5105 TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray);
5106 }
5107 }
5108 (void)TargetScope.Privatize();
5109 buildDependences(S, Data);
5110 auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
5111 &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
5112 // Set proper addresses for generated private copies.
5113 OMPPrivateScope Scope(CGF);
5114 if (!Data.FirstprivateVars.empty()) {
5115 enum { PrivatesParam = 2, CopyFnParam = 3 };
5116 llvm::Value *CopyFn = CGF.Builder.CreateLoad(
5117 Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: CopyFnParam)));
5118 llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar(
5119 CS->getCapturedDecl()->getParam(i: PrivatesParam)));
5120 // Map privates.
5121 llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs;
5122 llvm::SmallVector<llvm::Value *, 16> CallArgs;
5123 llvm::SmallVector<llvm::Type *, 4> ParamTypes;
5124 CallArgs.push_back(Elt: PrivatesPtr);
5125 ParamTypes.push_back(Elt: PrivatesPtr->getType());
5126 for (const Expr *E : Data.FirstprivateVars) {
5127 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5128 RawAddress PrivatePtr =
5129 CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()),
5130 Name: ".firstpriv.ptr.addr");
5131 PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr);
5132 CallArgs.push_back(Elt: PrivatePtr.getPointer());
5133 ParamTypes.push_back(Elt: PrivatePtr.getType());
5134 }
5135 auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(),
5136 Params: ParamTypes, /*isVarArg=*/false);
5137 CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
5138 CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs);
5139 for (const auto &Pair : PrivatePtrs) {
5140 Address Replacement(
5141 CGF.Builder.CreateLoad(Addr: Pair.second),
5142 CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()),
5143 CGF.getContext().getDeclAlign(Pair.first));
5144 Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement);
5145 }
5146 }
5147 CGF.processInReduction(S, Data, CGF, CS, Scope);
5148 if (InputInfo.NumberOfTargetItems > 0) {
5149 InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP(
5150 Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0);
5151 InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP(
5152 Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0);
5153 InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
5154 Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0);
5155 // If MVD is nullptr, the mapper array is not privatized
5156 if (MVD)
5157 InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
5158 Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0);
5159 }
5160
5161 Action.Enter(CGF);
5162 OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false);
5163 auto *TL = S.getSingleClause<OMPThreadLimitClause>();
5164 if (CGF.CGM.getLangOpts().OpenMP >= 51 &&
5165 needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) {
5166 // Emit __kmpc_set_thread_limit() to set the thread_limit for the task
5167 // enclosing this target region. This will indirectly set the thread_limit
5168 // for every applicable construct within target region.
5169 CGF.CGM.getOpenMPRuntime().emitThreadLimitClause(
5170 CGF, ThreadLimit: TL->getThreadLimit(), Loc: S.getBeginLoc());
5171 }
5172 BodyGen(CGF);
5173 };
5174 llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction(
5175 S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true,
5176 Data.NumberOfParts);
5177 llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0);
5178 IntegerLiteral IfCond(getContext(), TrueOrFalse,
5179 getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0),
5180 SourceLocation());
5181 CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn,
5182 SharedsTy, CapturedStruct, &IfCond, Data);
5183}
5184
5185void CodeGenFunction::processInReduction(const OMPExecutableDirective &S,
5186 OMPTaskDataTy &Data,
5187 CodeGenFunction &CGF,
5188 const CapturedStmt *CS,
5189 OMPPrivateScope &Scope) {
5190 if (Data.Reductions) {
5191 OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind();
5192 OMPLexicalScope LexScope(CGF, S, CapturedRegion);
5193 ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars,
5194 Data.ReductionCopies, Data.ReductionOps);
5195 llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad(
5196 Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: 4)));
5197 for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
5198 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5199 RedCG.emitAggregateType(CGF, N: Cnt);
5200 // FIXME: This must removed once the runtime library is fixed.
5201 // Emit required threadprivate variables for
5202 // initializer/combiner/finalizer.
5203 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5204 RCG&: RedCG, N: Cnt);
5205 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5206 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5207 Replacement = Address(
5208 CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF),
5209 SrcTy: CGF.getContext().VoidPtrTy,
5210 DstTy: CGF.getContext().getPointerType(
5211 T: Data.ReductionCopies[Cnt]->getType()),
5212 Loc: Data.ReductionCopies[Cnt]->getExprLoc()),
5213 CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()),
5214 Replacement.getAlignment());
5215 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5216 Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5217 }
5218 }
5219 (void)Scope.Privatize();
5220 SmallVector<const Expr *, 4> InRedVars;
5221 SmallVector<const Expr *, 4> InRedPrivs;
5222 SmallVector<const Expr *, 4> InRedOps;
5223 SmallVector<const Expr *, 4> TaskgroupDescriptors;
5224 for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) {
5225 auto IPriv = C->privates().begin();
5226 auto IRed = C->reduction_ops().begin();
5227 auto ITD = C->taskgroup_descriptors().begin();
5228 for (const Expr *Ref : C->varlists()) {
5229 InRedVars.emplace_back(Ref);
5230 InRedPrivs.emplace_back(*IPriv);
5231 InRedOps.emplace_back(*IRed);
5232 TaskgroupDescriptors.emplace_back(*ITD);
5233 std::advance(IPriv, 1);
5234 std::advance(IRed, 1);
5235 std::advance(ITD, 1);
5236 }
5237 }
5238 OMPPrivateScope InRedScope(CGF);
5239 if (!InRedVars.empty()) {
5240 ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps);
5241 for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) {
5242 RedCG.emitSharedOrigLValue(CGF, N: Cnt);
5243 RedCG.emitAggregateType(CGF, N: Cnt);
5244 // FIXME: This must removed once the runtime library is fixed.
5245 // Emit required threadprivate variables for
5246 // initializer/combiner/finalizer.
5247 CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(),
5248 RCG&: RedCG, N: Cnt);
5249 llvm::Value *ReductionsPtr;
5250 if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) {
5251 ReductionsPtr =
5252 CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc());
5253 } else {
5254 ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy);
5255 }
5256 Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
5257 CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt));
5258 Replacement = Address(
5259 CGF.EmitScalarConversion(
5260 Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy,
5261 DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()),
5262 Loc: InRedPrivs[Cnt]->getExprLoc()),
5263 CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()),
5264 Replacement.getAlignment());
5265 Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement);
5266 InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement);
5267 }
5268 }
5269 (void)InRedScope.Privatize();
5270}
5271
5272void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) {
5273 // Emit outlined function for task construct.
5274 const CapturedStmt *CS = S.getCapturedStmt(OMPD_task);
5275 Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
5276 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
5277 const Expr *IfCond = nullptr;
5278 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
5279 if (C->getNameModifier() == OMPD_unknown ||
5280 C->getNameModifier() == OMPD_task) {
5281 IfCond = C->getCondition();
5282 break;
5283 }
5284 }
5285
5286 OMPTaskDataTy Data;
5287 // Check if we should emit tied or untied task.
5288 Data.Tied = !S.getSingleClause<OMPUntiedClause>();
5289 auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) {
5290 CGF.EmitStmt(S: CS->getCapturedStmt());
5291 };
5292 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
5293 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
5294 const OMPTaskDataTy &Data) {
5295 CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn,
5296 SharedsTy, Shareds: CapturedStruct, IfCond,
5297 Data);
5298 };
5299 auto LPCRegion =
5300 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
5301 EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data);
5302}
5303
5304void CodeGenFunction::EmitOMPTaskyieldDirective(
5305 const OMPTaskyieldDirective &S) {
5306 CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc());
5307}
5308
5309void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) {
5310 const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>();
5311 Expr *ME = MC ? MC->getMessageString() : nullptr;
5312 const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>();
5313 bool IsFatal = false;
5314 if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal)
5315 IsFatal = true;
5316 CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal);
5317}
5318
5319void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) {
5320 CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier);
5321}
5322
5323void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) {
5324 OMPTaskDataTy Data;
5325 // Build list of dependences
5326 buildDependences(S, Data);
5327 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
5328 CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data);
5329}
5330
5331bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) {
5332 return T.clauses().empty();
5333}
5334
5335void CodeGenFunction::EmitOMPTaskgroupDirective(
5336 const OMPTaskgroupDirective &S) {
5337 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5338 if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) {
5339 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5340 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5341 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5342 AllocaInsertPt->getIterator());
5343
5344 auto BodyGenCB = [&, this](InsertPointTy AllocaIP,
5345 InsertPointTy CodeGenIP) {
5346 Builder.restoreIP(IP: CodeGenIP);
5347 EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5348 };
5349 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5350 if (!CapturedStmtInfo)
5351 CapturedStmtInfo = &CapStmtInfo;
5352 Builder.restoreIP(IP: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB));
5353 return;
5354 }
5355 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
5356 Action.Enter(CGF);
5357 if (const Expr *E = S.getReductionRef()) {
5358 SmallVector<const Expr *, 4> LHSs;
5359 SmallVector<const Expr *, 4> RHSs;
5360 OMPTaskDataTy Data;
5361 for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) {
5362 Data.ReductionVars.append(C->varlist_begin(), C->varlist_end());
5363 Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end());
5364 Data.ReductionCopies.append(C->privates().begin(), C->privates().end());
5365 Data.ReductionOps.append(C->reduction_ops().begin(),
5366 C->reduction_ops().end());
5367 LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end());
5368 RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end());
5369 }
5370 llvm::Value *ReductionDesc =
5371 CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(),
5372 LHSExprs: LHSs, RHSExprs: RHSs, Data);
5373 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
5374 CGF.EmitVarDecl(D: *VD);
5375 CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD),
5376 /*Volatile=*/false, Ty: E->getType());
5377 }
5378 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
5379 };
5380 CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc());
5381}
5382
5383void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
5384 llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>()
5385 ? llvm::AtomicOrdering::NotAtomic
5386 : llvm::AtomicOrdering::AcquireRelease;
5387 CGM.getOpenMPRuntime().emitFlush(
5388 CGF&: *this,
5389 Vars: [&S]() -> ArrayRef<const Expr *> {
5390 if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>())
5391 return llvm::ArrayRef(FlushClause->varlist_begin(),
5392 FlushClause->varlist_end());
5393 return std::nullopt;
5394 }(),
5395 Loc: S.getBeginLoc(), AO);
5396}
5397
5398void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
5399 const auto *DO = S.getSingleClause<OMPDepobjClause>();
5400 LValue DOLVal = EmitLValue(E: DO->getDepobj());
5401 if (const auto *DC = S.getSingleClause<OMPDependClause>()) {
5402 OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(),
5403 DC->getModifier());
5404 Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end());
5405 Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause(
5406 CGF&: *this, Dependencies, Loc: DC->getBeginLoc());
5407 EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal);
5408 return;
5409 }
5410 if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) {
5411 CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc());
5412 return;
5413 }
5414 if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) {
5415 CGM.getOpenMPRuntime().emitUpdateClause(
5416 CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc());
5417 return;
5418 }
5419}
5420
5421void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
5422 if (!OMPParentLoopDirectiveForScan)
5423 return;
5424 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
5425 bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>();
5426 SmallVector<const Expr *, 4> Shareds;
5427 SmallVector<const Expr *, 4> Privates;
5428 SmallVector<const Expr *, 4> LHSs;
5429 SmallVector<const Expr *, 4> RHSs;
5430 SmallVector<const Expr *, 4> ReductionOps;
5431 SmallVector<const Expr *, 4> CopyOps;
5432 SmallVector<const Expr *, 4> CopyArrayTemps;
5433 SmallVector<const Expr *, 4> CopyArrayElems;
5434 for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) {
5435 if (C->getModifier() != OMPC_REDUCTION_inscan)
5436 continue;
5437 Shareds.append(C->varlist_begin(), C->varlist_end());
5438 Privates.append(in_start: C->privates().begin(), in_end: C->privates().end());
5439 LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end());
5440 RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end());
5441 ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end());
5442 CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end());
5443 CopyArrayTemps.append(in_start: C->copy_array_temps().begin(),
5444 in_end: C->copy_array_temps().end());
5445 CopyArrayElems.append(in_start: C->copy_array_elems().begin(),
5446 in_end: C->copy_array_elems().end());
5447 }
5448 if (ParentDir.getDirectiveKind() == OMPD_simd ||
5449 (getLangOpts().OpenMPSimd &&
5450 isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) {
5451 // For simd directive and simd-based directives in simd only mode, use the
5452 // following codegen:
5453 // int x = 0;
5454 // #pragma omp simd reduction(inscan, +: x)
5455 // for (..) {
5456 // <first part>
5457 // #pragma omp scan inclusive(x)
5458 // <second part>
5459 // }
5460 // is transformed to:
5461 // int x = 0;
5462 // for (..) {
5463 // int x_priv = 0;
5464 // <first part>
5465 // x = x_priv + x;
5466 // x_priv = x;
5467 // <second part>
5468 // }
5469 // and
5470 // int x = 0;
5471 // #pragma omp simd reduction(inscan, +: x)
5472 // for (..) {
5473 // <first part>
5474 // #pragma omp scan exclusive(x)
5475 // <second part>
5476 // }
5477 // to
5478 // int x = 0;
5479 // for (..) {
5480 // int x_priv = 0;
5481 // <second part>
5482 // int temp = x;
5483 // x = x_priv + x;
5484 // x_priv = temp;
5485 // <first part>
5486 // }
5487 llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce");
5488 EmitBranch(Block: IsInclusive
5489 ? OMPScanReduce
5490 : BreakContinueStack.back().ContinueBlock.getBlock());
5491 EmitBlock(BB: OMPScanDispatch);
5492 {
5493 // New scope for correct construction/destruction of temp variables for
5494 // exclusive scan.
5495 LexicalScope Scope(*this, S.getSourceRange());
5496 EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
5497 EmitBlock(BB: OMPScanReduce);
5498 if (!IsInclusive) {
5499 // Create temp var and copy LHS value to this temp value.
5500 // TMP = LHS;
5501 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5502 const Expr *PrivateExpr = Privates[I];
5503 const Expr *TempExpr = CopyArrayTemps[I];
5504 EmitAutoVarDecl(
5505 D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl()));
5506 LValue DestLVal = EmitLValue(E: TempExpr);
5507 LValue SrcLVal = EmitLValue(E: LHSs[I]);
5508 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this),
5509 SrcAddr: SrcLVal.getAddress(CGF&: *this),
5510 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5511 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5512 Copy: CopyOps[I]);
5513 }
5514 }
5515 CGM.getOpenMPRuntime().emitReduction(
5516 *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps,
5517 {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd});
5518 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5519 const Expr *PrivateExpr = Privates[I];
5520 LValue DestLVal;
5521 LValue SrcLVal;
5522 if (IsInclusive) {
5523 DestLVal = EmitLValue(E: RHSs[I]);
5524 SrcLVal = EmitLValue(E: LHSs[I]);
5525 } else {
5526 const Expr *TempExpr = CopyArrayTemps[I];
5527 DestLVal = EmitLValue(E: RHSs[I]);
5528 SrcLVal = EmitLValue(E: TempExpr);
5529 }
5530 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this),
5531 SrcAddr: SrcLVal.getAddress(CGF&: *this),
5532 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5533 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5534 Copy: CopyOps[I]);
5535 }
5536 }
5537 EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
5538 OMPScanExitBlock = IsInclusive
5539 ? BreakContinueStack.back().ContinueBlock.getBlock()
5540 : OMPScanReduce;
5541 EmitBlock(BB: OMPAfterScanBlock);
5542 return;
5543 }
5544 if (!IsInclusive) {
5545 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5546 EmitBlock(BB: OMPScanExitBlock);
5547 }
5548 if (OMPFirstScanLoop) {
5549 // Emit buffer[i] = red; at the end of the input phase.
5550 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5551 .getIterationVariable()
5552 ->IgnoreParenImpCasts();
5553 LValue IdxLVal = EmitLValue(E: IVExpr);
5554 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5555 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5556 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5557 const Expr *PrivateExpr = Privates[I];
5558 const Expr *OrigExpr = Shareds[I];
5559 const Expr *CopyArrayElem = CopyArrayElems[I];
5560 OpaqueValueMapping IdxMapping(
5561 *this,
5562 cast<OpaqueValueExpr>(
5563 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5564 RValue::get(V: IdxVal));
5565 LValue DestLVal = EmitLValue(E: CopyArrayElem);
5566 LValue SrcLVal = EmitLValue(E: OrigExpr);
5567 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this),
5568 SrcAddr: SrcLVal.getAddress(CGF&: *this),
5569 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5570 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5571 Copy: CopyOps[I]);
5572 }
5573 }
5574 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5575 if (IsInclusive) {
5576 EmitBlock(BB: OMPScanExitBlock);
5577 EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock());
5578 }
5579 EmitBlock(BB: OMPScanDispatch);
5580 if (!OMPFirstScanLoop) {
5581 // Emit red = buffer[i]; at the entrance to the scan phase.
5582 const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir)
5583 .getIterationVariable()
5584 ->IgnoreParenImpCasts();
5585 LValue IdxLVal = EmitLValue(E: IVExpr);
5586 llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc());
5587 IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false);
5588 llvm::BasicBlock *ExclusiveExitBB = nullptr;
5589 if (!IsInclusive) {
5590 llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec");
5591 ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit");
5592 llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal);
5593 Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB);
5594 EmitBlock(BB: ContBB);
5595 // Use idx - 1 iteration for exclusive scan.
5596 IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1));
5597 }
5598 for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) {
5599 const Expr *PrivateExpr = Privates[I];
5600 const Expr *OrigExpr = Shareds[I];
5601 const Expr *CopyArrayElem = CopyArrayElems[I];
5602 OpaqueValueMapping IdxMapping(
5603 *this,
5604 cast<OpaqueValueExpr>(
5605 Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()),
5606 RValue::get(V: IdxVal));
5607 LValue SrcLVal = EmitLValue(E: CopyArrayElem);
5608 LValue DestLVal = EmitLValue(E: OrigExpr);
5609 EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this),
5610 SrcAddr: SrcLVal.getAddress(CGF&: *this),
5611 DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()),
5612 SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()),
5613 Copy: CopyOps[I]);
5614 }
5615 if (!IsInclusive) {
5616 EmitBlock(BB: ExclusiveExitBB);
5617 }
5618 }
5619 EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock
5620 : OMPAfterScanBlock);
5621 EmitBlock(BB: OMPAfterScanBlock);
5622}
5623
5624void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
5625 const CodeGenLoopTy &CodeGenLoop,
5626 Expr *IncExpr) {
5627 // Emit the loop iteration variable.
5628 const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable());
5629 const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl());
5630 EmitVarDecl(D: *IVDecl);
5631
5632 // Emit the iterations count variable.
5633 // If it is not a variable, Sema decided to calculate iterations count on each
5634 // iteration (e.g., it is foldable into a constant).
5635 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
5636 EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
5637 // Emit calculation of the iterations count.
5638 EmitIgnoredExpr(E: S.getCalcLastIteration());
5639 }
5640
5641 CGOpenMPRuntime &RT = CGM.getOpenMPRuntime();
5642
5643 bool HasLastprivateClause = false;
5644 // Check pre-condition.
5645 {
5646 OMPLoopScope PreInitScope(*this, S);
5647 // Skip the entire loop if we don't meet the precondition.
5648 // If the condition constant folds and can be elided, avoid emitting the
5649 // whole loop.
5650 bool CondConstant;
5651 llvm::BasicBlock *ContBlock = nullptr;
5652 if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
5653 if (!CondConstant)
5654 return;
5655 } else {
5656 llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then");
5657 ContBlock = createBasicBlock(name: "omp.precond.end");
5658 emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
5659 TrueCount: getProfileCount(&S));
5660 EmitBlock(BB: ThenBlock);
5661 incrementProfileCounter(&S);
5662 }
5663
5664 emitAlignedClause(*this, S);
5665 // Emit 'then' code.
5666 {
5667 // Emit helper vars inits.
5668
5669 LValue LB = EmitOMPHelperVar(
5670 *this, cast<DeclRefExpr>(
5671 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5672 ? S.getCombinedLowerBoundVariable()
5673 : S.getLowerBoundVariable())));
5674 LValue UB = EmitOMPHelperVar(
5675 *this, cast<DeclRefExpr>(
5676 (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5677 ? S.getCombinedUpperBoundVariable()
5678 : S.getUpperBoundVariable())));
5679 LValue ST =
5680 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()));
5681 LValue IL =
5682 EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()));
5683
5684 OMPPrivateScope LoopScope(*this);
5685 if (EmitOMPFirstprivateClause(S, LoopScope)) {
5686 // Emit implicit barrier to synchronize threads and avoid data races
5687 // on initialization of firstprivate variables and post-update of
5688 // lastprivate variables.
5689 CGM.getOpenMPRuntime().emitBarrierCall(
5690 *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false,
5691 /*ForceSimpleCall=*/true);
5692 }
5693 EmitOMPPrivateClause(S, LoopScope);
5694 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5695 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5696 !isOpenMPTeamsDirective(S.getDirectiveKind()))
5697 EmitOMPReductionClauseInit(S, LoopScope);
5698 HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope);
5699 EmitOMPPrivateLoopCounters(S, LoopScope);
5700 (void)LoopScope.Privatize();
5701 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
5702 CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S);
5703
5704 // Detect the distribute schedule kind and chunk.
5705 llvm::Value *Chunk = nullptr;
5706 OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown;
5707 if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) {
5708 ScheduleKind = C->getDistScheduleKind();
5709 if (const Expr *Ch = C->getChunkSize()) {
5710 Chunk = EmitScalarExpr(E: Ch);
5711 Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(),
5712 DstTy: S.getIterationVariable()->getType(),
5713 Loc: S.getBeginLoc());
5714 }
5715 } else {
5716 // Default behaviour for dist_schedule clause.
5717 CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk(
5718 CGF&: *this, S, ScheduleKind, Chunk);
5719 }
5720 const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
5721 const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
5722
5723 // OpenMP [2.10.8, distribute Construct, Description]
5724 // If dist_schedule is specified, kind must be static. If specified,
5725 // iterations are divided into chunks of size chunk_size, chunks are
5726 // assigned to the teams of the league in a round-robin fashion in the
5727 // order of the team number. When no chunk_size is specified, the
5728 // iteration space is divided into chunks that are approximately equal
5729 // in size, and at most one chunk is distributed to each team of the
5730 // league. The size of the chunks is unspecified in this case.
5731 bool StaticChunked =
5732 RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) &&
5733 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind());
5734 if (RT.isStaticNonchunked(ScheduleKind,
5735 /* Chunked */ Chunk != nullptr) ||
5736 StaticChunked) {
5737 CGOpenMPRuntime::StaticRTInput StaticInit(
5738 IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(CGF&: *this),
5739 LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this),
5740 StaticChunked ? Chunk : nullptr);
5741 RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind,
5742 Values: StaticInit);
5743 JumpDest LoopExit =
5744 getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit"));
5745 // UB = min(UB, GlobalUB);
5746 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5747 ? S.getCombinedEnsureUpperBound()
5748 : S.getEnsureUpperBound());
5749 // IV = LB;
5750 EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5751 ? S.getCombinedInit()
5752 : S.getInit());
5753
5754 const Expr *Cond =
5755 isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
5756 ? S.getCombinedCond()
5757 : S.getCond();
5758
5759 if (StaticChunked)
5760 Cond = S.getCombinedDistCond();
5761
5762 // For static unchunked schedules generate:
5763 //
5764 // 1. For distribute alone, codegen
5765 // while (idx <= UB) {
5766 // BODY;
5767 // ++idx;
5768 // }
5769 //
5770 // 2. When combined with 'for' (e.g. as in 'distribute parallel for')
5771 // while (idx <= UB) {
5772 // <CodeGen rest of pragma>(LB, UB);
5773 // idx += ST;
5774 // }
5775 //
5776 // For static chunk one schedule generate:
5777 //
5778 // while (IV <= GlobalUB) {
5779 // <CodeGen rest of pragma>(LB, UB);
5780 // LB += ST;
5781 // UB += ST;
5782 // UB = min(UB, GlobalUB);
5783 // IV = LB;
5784 // }
5785 //
5786 emitCommonSimdLoop(
5787 CGF&: *this, S,
5788 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5789 if (isOpenMPSimdDirective(S.getDirectiveKind()))
5790 CGF.EmitOMPSimdInit(D: S);
5791 },
5792 BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop,
5793 StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) {
5794 CGF.EmitOMPInnerLoop(
5795 S, LoopScope.requiresCleanups(), Cond, IncExpr,
5796 [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
5797 CodeGenLoop(CGF, S, LoopExit);
5798 },
5799 [&S, StaticChunked](CodeGenFunction &CGF) {
5800 if (StaticChunked) {
5801 CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound());
5802 CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound());
5803 CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound());
5804 CGF.EmitIgnoredExpr(E: S.getCombinedInit());
5805 }
5806 });
5807 });
5808 EmitBlock(BB: LoopExit.getBlock());
5809 // Tell the runtime we are done.
5810 RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute);
5811 } else {
5812 // Emit the outer loop, which requests its work chunk [LB..UB] from
5813 // runtime and runs the inner loop to process it.
5814 const OMPLoopArguments LoopArguments = {
5815 LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this),
5816 IL.getAddress(CGF&: *this), Chunk};
5817 EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments,
5818 CodeGenLoopContent: CodeGenLoop);
5819 }
5820 if (isOpenMPSimdDirective(S.getDirectiveKind())) {
5821 EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) {
5822 return CGF.Builder.CreateIsNotNull(
5823 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5824 });
5825 }
5826 if (isOpenMPSimdDirective(S.getDirectiveKind()) &&
5827 !isOpenMPParallelDirective(S.getDirectiveKind()) &&
5828 !isOpenMPTeamsDirective(S.getDirectiveKind())) {
5829 EmitOMPReductionClauseFinal(S, OMPD_simd);
5830 // Emit post-update of the reduction variables if IsLastIter != 0.
5831 emitPostUpdateForReductionClause(
5832 *this, S, [IL, &S](CodeGenFunction &CGF) {
5833 return CGF.Builder.CreateIsNotNull(
5834 CGF.EmitLoadOfScalar(IL, S.getBeginLoc()));
5835 });
5836 }
5837 // Emit final copy of the lastprivate variables if IsLastIter != 0.
5838 if (HasLastprivateClause) {
5839 EmitOMPLastprivateClauseFinal(
5840 D: S, /*NoFinals=*/false,
5841 IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(IL, S.getBeginLoc())));
5842 }
5843 }
5844
5845 // We're now done with the loop, so jump to the continuation block.
5846 if (ContBlock) {
5847 EmitBranch(Block: ContBlock);
5848 EmitBlock(BB: ContBlock, IsFinished: true);
5849 }
5850 }
5851}
5852
5853void CodeGenFunction::EmitOMPDistributeDirective(
5854 const OMPDistributeDirective &S) {
5855 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
5856 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
5857 };
5858 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5859 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen);
5860}
5861
5862static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM,
5863 const CapturedStmt *S,
5864 SourceLocation Loc) {
5865 CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
5866 CodeGenFunction::CGCapturedStmtInfo CapStmtInfo;
5867 CGF.CapturedStmtInfo = &CapStmtInfo;
5868 llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc);
5869 Fn->setDoesNotRecurse();
5870 return Fn;
5871}
5872
5873template <typename T>
5874static void emitRestoreIP(CodeGenFunction &CGF, const T *C,
5875 llvm::OpenMPIRBuilder::InsertPointTy AllocaIP,
5876 llvm::OpenMPIRBuilder &OMPBuilder) {
5877
5878 unsigned NumLoops = C->getNumLoops();
5879 QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth(
5880 /*DestWidth=*/64, /*Signed=*/1);
5881 llvm::SmallVector<llvm::Value *> StoreValues;
5882 for (unsigned I = 0; I < NumLoops; I++) {
5883 const Expr *CounterVal = C->getLoopData(I);
5884 assert(CounterVal);
5885 llvm::Value *StoreValue = CGF.EmitScalarConversion(
5886 Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty,
5887 Loc: CounterVal->getExprLoc());
5888 StoreValues.emplace_back(Args&: StoreValue);
5889 }
5890 OMPDoacrossKind<T> ODK;
5891 bool IsDependSource = ODK.isSource(C);
5892 CGF.Builder.restoreIP(
5893 IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops,
5894 StoreValues, Name: ".cnt.addr", IsDependSource));
5895}
5896
5897void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) {
5898 if (CGM.getLangOpts().OpenMPIRBuilder) {
5899 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
5900 using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
5901
5902 if (S.hasClausesOfKind<OMPDependClause>() ||
5903 S.hasClausesOfKind<OMPDoacrossClause>()) {
5904 // The ordered directive with depend clause.
5905 assert(!S.hasAssociatedStmt() && "No associated statement must be in "
5906 "ordered depend|doacross construct.");
5907 InsertPointTy AllocaIP(AllocaInsertPt->getParent(),
5908 AllocaInsertPt->getIterator());
5909 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5910 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5911 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5912 emitRestoreIP(*this, DC, AllocaIP, OMPBuilder);
5913 } else {
5914 // The ordered directive with threads or simd clause, or without clause.
5915 // Without clause, it behaves as if the threads clause is specified.
5916 const auto *C = S.getSingleClause<OMPSIMDClause>();
5917
5918 auto FiniCB = [this](InsertPointTy IP) {
5919 OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP);
5920 };
5921
5922 auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP,
5923 InsertPointTy CodeGenIP) {
5924 Builder.restoreIP(IP: CodeGenIP);
5925
5926 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5927 if (C) {
5928 llvm::BasicBlock *FiniBB = splitBBWithSuffix(
5929 Builder, /*CreateBranch=*/false, Suffix: ".ordered.after");
5930 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5931 GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
5932 llvm::Function *OutlinedFn =
5933 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5934 assert(S.getBeginLoc().isValid() &&
5935 "Outlined function call location must be valid.");
5936 ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc());
5937 OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB,
5938 Fn: OutlinedFn, Args: CapturedVars);
5939 } else {
5940 OMPBuilderCBHelpers::EmitOMPInlinedRegionBody(
5941 CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered");
5942 }
5943 };
5944
5945 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5946 Builder.restoreIP(
5947 IP: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C));
5948 }
5949 return;
5950 }
5951
5952 if (S.hasClausesOfKind<OMPDependClause>()) {
5953 assert(!S.hasAssociatedStmt() &&
5954 "No associated statement must be in ordered depend construct.");
5955 for (const auto *DC : S.getClausesOfKind<OMPDependClause>())
5956 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5957 return;
5958 }
5959 if (S.hasClausesOfKind<OMPDoacrossClause>()) {
5960 assert(!S.hasAssociatedStmt() &&
5961 "No associated statement must be in ordered doacross construct.");
5962 for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>())
5963 CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC);
5964 return;
5965 }
5966 const auto *C = S.getSingleClause<OMPSIMDClause>();
5967 auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF,
5968 PrePostActionTy &Action) {
5969 const CapturedStmt *CS = S.getInnermostCapturedStmt();
5970 if (C) {
5971 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
5972 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
5973 llvm::Function *OutlinedFn =
5974 emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc());
5975 CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(),
5976 OutlinedFn, Args: CapturedVars);
5977 } else {
5978 Action.Enter(CGF);
5979 CGF.EmitStmt(S: CS->getCapturedStmt());
5980 }
5981 };
5982 OMPLexicalScope Scope(*this, S, OMPD_unknown);
5983 CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C);
5984}
5985
5986static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
5987 QualType SrcType, QualType DestType,
5988 SourceLocation Loc) {
5989 assert(CGF.hasScalarEvaluationKind(DestType) &&
5990 "DestType must have scalar evaluation kind.");
5991 assert(!Val.isAggregate() && "Must be a scalar or complex.");
5992 return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType,
5993 DstTy: DestType, Loc)
5994 : CGF.EmitComplexToScalarConversion(
5995 Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc);
5996}
5997
5998static CodeGenFunction::ComplexPairTy
5999convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
6000 QualType DestType, SourceLocation Loc) {
6001 assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
6002 "DestType must have complex evaluation kind.");
6003 CodeGenFunction::ComplexPairTy ComplexVal;
6004 if (Val.isScalar()) {
6005 // Convert the input element to the element type of the complex.
6006 QualType DestElementType =
6007 DestType->castAs<ComplexType>()->getElementType();
6008 llvm::Value *ScalarVal = CGF.EmitScalarConversion(
6009 Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc);
6010 ComplexVal = CodeGenFunction::ComplexPairTy(
6011 ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType()));
6012 } else {
6013 assert(Val.isComplex() && "Must be a scalar or complex.");
6014 QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
6015 QualType DestElementType =
6016 DestType->castAs<ComplexType>()->getElementType();
6017 ComplexVal.first = CGF.EmitScalarConversion(
6018 Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6019 ComplexVal.second = CGF.EmitScalarConversion(
6020 Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc);
6021 }
6022 return ComplexVal;
6023}
6024
6025static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6026 LValue LVal, RValue RVal) {
6027 if (LVal.isGlobalReg())
6028 CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal);
6029 else
6030 CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false);
6031}
6032
6033static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF,
6034 llvm::AtomicOrdering AO, LValue LVal,
6035 SourceLocation Loc) {
6036 if (LVal.isGlobalReg())
6037 return CGF.EmitLoadOfLValue(V: LVal, Loc);
6038 return CGF.EmitAtomicLoad(
6039 lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO),
6040 IsVolatile: LVal.isVolatile());
6041}
6042
6043void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal,
6044 QualType RValTy, SourceLocation Loc) {
6045 switch (getEvaluationKind(T: LVal.getType())) {
6046 case TEK_Scalar:
6047 EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue(
6048 CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)),
6049 Dst: LVal);
6050 break;
6051 case TEK_Complex:
6052 EmitStoreOfComplex(
6053 V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal,
6054 /*isInit=*/false);
6055 break;
6056 case TEK_Aggregate:
6057 llvm_unreachable("Must be a scalar or complex.");
6058 }
6059}
6060
6061static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO,
6062 const Expr *X, const Expr *V,
6063 SourceLocation Loc) {
6064 // v = x;
6065 assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
6066 assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
6067 LValue XLValue = CGF.EmitLValue(E: X);
6068 LValue VLValue = CGF.EmitLValue(E: V);
6069 RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc);
6070 // OpenMP, 2.17.7, atomic Construct
6071 // If the read or capture clause is specified and the acquire, acq_rel, or
6072 // seq_cst clause is specified then the strong flush on exit from the atomic
6073 // operation is also an acquire flush.
6074 switch (AO) {
6075 case llvm::AtomicOrdering::Acquire:
6076 case llvm::AtomicOrdering::AcquireRelease:
6077 case llvm::AtomicOrdering::SequentiallyConsistent:
6078 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6079 AO: llvm::AtomicOrdering::Acquire);
6080 break;
6081 case llvm::AtomicOrdering::Monotonic:
6082 case llvm::AtomicOrdering::Release:
6083 break;
6084 case llvm::AtomicOrdering::NotAtomic:
6085 case llvm::AtomicOrdering::Unordered:
6086 llvm_unreachable("Unexpected ordering.");
6087 }
6088 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc);
6089 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6090}
6091
6092static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF,
6093 llvm::AtomicOrdering AO, const Expr *X,
6094 const Expr *E, SourceLocation Loc) {
6095 // x = expr;
6096 assert(X->isLValue() && "X of 'omp atomic write' is not lvalue");
6097 emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E));
6098 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6099 // OpenMP, 2.17.7, atomic Construct
6100 // If the write, update, or capture clause is specified and the release,
6101 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6102 // the atomic operation is also a release flush.
6103 switch (AO) {
6104 case llvm::AtomicOrdering::Release:
6105 case llvm::AtomicOrdering::AcquireRelease:
6106 case llvm::AtomicOrdering::SequentiallyConsistent:
6107 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6108 AO: llvm::AtomicOrdering::Release);
6109 break;
6110 case llvm::AtomicOrdering::Acquire:
6111 case llvm::AtomicOrdering::Monotonic:
6112 break;
6113 case llvm::AtomicOrdering::NotAtomic:
6114 case llvm::AtomicOrdering::Unordered:
6115 llvm_unreachable("Unexpected ordering.");
6116 }
6117}
6118
6119static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
6120 RValue Update,
6121 BinaryOperatorKind BO,
6122 llvm::AtomicOrdering AO,
6123 bool IsXLHSInRHSPart) {
6124 ASTContext &Context = CGF.getContext();
6125 // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x'
6126 // expression is simple and atomic is allowed for the given type for the
6127 // target platform.
6128 if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() ||
6129 (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) &&
6130 (Update.getScalarVal()->getType() !=
6131 X.getAddress(CGF).getElementType())) ||
6132 !Context.getTargetInfo().hasBuiltinAtomic(
6133 AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment())))
6134 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6135
6136 auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) {
6137 if (T->isIntegerTy())
6138 return true;
6139
6140 if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub))
6141 return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T));
6142
6143 return false;
6144 };
6145
6146 if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) ||
6147 !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO))
6148 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6149
6150 bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy();
6151 llvm::AtomicRMWInst::BinOp RMWOp;
6152 switch (BO) {
6153 case BO_Add:
6154 RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd;
6155 break;
6156 case BO_Sub:
6157 if (!IsXLHSInRHSPart)
6158 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6159 RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub;
6160 break;
6161 case BO_And:
6162 RMWOp = llvm::AtomicRMWInst::And;
6163 break;
6164 case BO_Or:
6165 RMWOp = llvm::AtomicRMWInst::Or;
6166 break;
6167 case BO_Xor:
6168 RMWOp = llvm::AtomicRMWInst::Xor;
6169 break;
6170 case BO_LT:
6171 if (IsInteger)
6172 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6173 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min
6174 : llvm::AtomicRMWInst::Max)
6175 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin
6176 : llvm::AtomicRMWInst::UMax);
6177 else
6178 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin
6179 : llvm::AtomicRMWInst::FMax;
6180 break;
6181 case BO_GT:
6182 if (IsInteger)
6183 RMWOp = X.getType()->hasSignedIntegerRepresentation()
6184 ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max
6185 : llvm::AtomicRMWInst::Min)
6186 : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax
6187 : llvm::AtomicRMWInst::UMin);
6188 else
6189 RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax
6190 : llvm::AtomicRMWInst::FMin;
6191 break;
6192 case BO_Assign:
6193 RMWOp = llvm::AtomicRMWInst::Xchg;
6194 break;
6195 case BO_Mul:
6196 case BO_Div:
6197 case BO_Rem:
6198 case BO_Shl:
6199 case BO_Shr:
6200 case BO_LAnd:
6201 case BO_LOr:
6202 return std::make_pair(x: false, y: RValue::get(V: nullptr));
6203 case BO_PtrMemD:
6204 case BO_PtrMemI:
6205 case BO_LE:
6206 case BO_GE:
6207 case BO_EQ:
6208 case BO_NE:
6209 case BO_Cmp:
6210 case BO_AddAssign:
6211 case BO_SubAssign:
6212 case BO_AndAssign:
6213 case BO_OrAssign:
6214 case BO_XorAssign:
6215 case BO_MulAssign:
6216 case BO_DivAssign:
6217 case BO_RemAssign:
6218 case BO_ShlAssign:
6219 case BO_ShrAssign:
6220 case BO_Comma:
6221 llvm_unreachable("Unsupported atomic update operation");
6222 }
6223 llvm::Value *UpdateVal = Update.getScalarVal();
6224 if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) {
6225 if (IsInteger)
6226 UpdateVal = CGF.Builder.CreateIntCast(
6227 V: IC, DestTy: X.getAddress(CGF).getElementType(),
6228 isSigned: X.getType()->hasSignedIntegerRepresentation());
6229 else
6230 UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC,
6231 DestTy: X.getAddress(CGF).getElementType());
6232 }
6233 llvm::Value *Res =
6234 CGF.Builder.CreateAtomicRMW(Op: RMWOp, Addr: X.getAddress(CGF), Val: UpdateVal, Ordering: AO);
6235 return std::make_pair(x: true, y: RValue::get(V: Res));
6236}
6237
6238std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr(
6239 LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart,
6240 llvm::AtomicOrdering AO, SourceLocation Loc,
6241 const llvm::function_ref<RValue(RValue)> CommonGen) {
6242 // Update expressions are allowed to have the following forms:
6243 // x binop= expr; -> xrval + expr;
6244 // x++, ++x -> xrval + 1;
6245 // x--, --x -> xrval - 1;
6246 // x = x binop expr; -> xrval binop expr
6247 // x = expr Op x; - > expr binop xrval;
6248 auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart);
6249 if (!Res.first) {
6250 if (X.isGlobalReg()) {
6251 // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop
6252 // 'xrval'.
6253 EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X);
6254 } else {
6255 // Perform compare-and-swap procedure.
6256 EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified());
6257 }
6258 }
6259 return Res;
6260}
6261
6262static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF,
6263 llvm::AtomicOrdering AO, const Expr *X,
6264 const Expr *E, const Expr *UE,
6265 bool IsXLHSInRHSPart, SourceLocation Loc) {
6266 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6267 "Update expr in 'atomic update' must be a binary operator.");
6268 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6269 // Update expressions are allowed to have the following forms:
6270 // x binop= expr; -> xrval + expr;
6271 // x++, ++x -> xrval + 1;
6272 // x--, --x -> xrval - 1;
6273 // x = x binop expr; -> xrval binop expr
6274 // x = expr Op x; - > expr binop xrval;
6275 assert(X->isLValue() && "X of 'omp atomic update' is not lvalue");
6276 LValue XLValue = CGF.EmitLValue(E: X);
6277 RValue ExprRValue = CGF.EmitAnyExpr(E);
6278 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6279 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6280 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6281 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6282 auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) {
6283 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6284 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6285 return CGF.EmitAnyExpr(E: UE);
6286 };
6287 (void)CGF.EmitOMPAtomicSimpleUpdateExpr(
6288 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6289 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6290 // OpenMP, 2.17.7, atomic Construct
6291 // If the write, update, or capture clause is specified and the release,
6292 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6293 // the atomic operation is also a release flush.
6294 switch (AO) {
6295 case llvm::AtomicOrdering::Release:
6296 case llvm::AtomicOrdering::AcquireRelease:
6297 case llvm::AtomicOrdering::SequentiallyConsistent:
6298 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6299 AO: llvm::AtomicOrdering::Release);
6300 break;
6301 case llvm::AtomicOrdering::Acquire:
6302 case llvm::AtomicOrdering::Monotonic:
6303 break;
6304 case llvm::AtomicOrdering::NotAtomic:
6305 case llvm::AtomicOrdering::Unordered:
6306 llvm_unreachable("Unexpected ordering.");
6307 }
6308}
6309
6310static RValue convertToType(CodeGenFunction &CGF, RValue Value,
6311 QualType SourceType, QualType ResType,
6312 SourceLocation Loc) {
6313 switch (CGF.getEvaluationKind(T: ResType)) {
6314 case TEK_Scalar:
6315 return RValue::get(
6316 V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc));
6317 case TEK_Complex: {
6318 auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc);
6319 return RValue::getComplex(V1: Res.first, V2: Res.second);
6320 }
6321 case TEK_Aggregate:
6322 break;
6323 }
6324 llvm_unreachable("Must be a scalar or complex.");
6325}
6326
6327static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF,
6328 llvm::AtomicOrdering AO,
6329 bool IsPostfixUpdate, const Expr *V,
6330 const Expr *X, const Expr *E,
6331 const Expr *UE, bool IsXLHSInRHSPart,
6332 SourceLocation Loc) {
6333 assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue");
6334 assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue");
6335 RValue NewVVal;
6336 LValue VLValue = CGF.EmitLValue(E: V);
6337 LValue XLValue = CGF.EmitLValue(E: X);
6338 RValue ExprRValue = CGF.EmitAnyExpr(E);
6339 QualType NewVValType;
6340 if (UE) {
6341 // 'x' is updated with some additional value.
6342 assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) &&
6343 "Update expr in 'atomic capture' must be a binary operator.");
6344 const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts());
6345 // Update expressions are allowed to have the following forms:
6346 // x binop= expr; -> xrval + expr;
6347 // x++, ++x -> xrval + 1;
6348 // x--, --x -> xrval - 1;
6349 // x = x binop expr; -> xrval binop expr
6350 // x = expr Op x; - > expr binop xrval;
6351 const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts());
6352 const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts());
6353 const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS;
6354 NewVValType = XRValExpr->getType();
6355 const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS;
6356 auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr,
6357 IsPostfixUpdate](RValue XRValue) {
6358 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6359 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue);
6360 RValue Res = CGF.EmitAnyExpr(E: UE);
6361 NewVVal = IsPostfixUpdate ? XRValue : Res;
6362 return Res;
6363 };
6364 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6365 X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen);
6366 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6367 if (Res.first) {
6368 // 'atomicrmw' instruction was generated.
6369 if (IsPostfixUpdate) {
6370 // Use old value from 'atomicrmw'.
6371 NewVVal = Res.second;
6372 } else {
6373 // 'atomicrmw' does not provide new value, so evaluate it using old
6374 // value of 'x'.
6375 CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue);
6376 CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second);
6377 NewVVal = CGF.EmitAnyExpr(E: UE);
6378 }
6379 }
6380 } else {
6381 // 'x' is simply rewritten with some 'expr'.
6382 NewVValType = X->getType().getNonReferenceType();
6383 ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(),
6384 ResType: X->getType().getNonReferenceType(), Loc);
6385 auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) {
6386 NewVVal = XRValue;
6387 return ExprRValue;
6388 };
6389 // Try to perform atomicrmw xchg, otherwise simple exchange.
6390 auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr(
6391 X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO,
6392 Loc, CommonGen: Gen);
6393 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X);
6394 if (Res.first) {
6395 // 'atomicrmw' instruction was generated.
6396 NewVVal = IsPostfixUpdate ? Res.second : ExprRValue;
6397 }
6398 }
6399 // Emit post-update store to 'v' of old/new 'x' value.
6400 CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc);
6401 CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V);
6402 // OpenMP 5.1 removes the required flush for capture clause.
6403 if (CGF.CGM.getLangOpts().OpenMP < 51) {
6404 // OpenMP, 2.17.7, atomic Construct
6405 // If the write, update, or capture clause is specified and the release,
6406 // acq_rel, or seq_cst clause is specified then the strong flush on entry to
6407 // the atomic operation is also a release flush.
6408 // If the read or capture clause is specified and the acquire, acq_rel, or
6409 // seq_cst clause is specified then the strong flush on exit from the atomic
6410 // operation is also an acquire flush.
6411 switch (AO) {
6412 case llvm::AtomicOrdering::Release:
6413 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6414 AO: llvm::AtomicOrdering::Release);
6415 break;
6416 case llvm::AtomicOrdering::Acquire:
6417 CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc,
6418 AO: llvm::AtomicOrdering::Acquire);
6419 break;
6420 case llvm::AtomicOrdering::AcquireRelease:
6421 case llvm::AtomicOrdering::SequentiallyConsistent:
6422 CGF.CGM.getOpenMPRuntime().emitFlush(
6423 CGF, Vars: std::nullopt, Loc, AO: llvm::AtomicOrdering::AcquireRelease);
6424 break;
6425 case llvm::AtomicOrdering::Monotonic:
6426 break;
6427 case llvm::AtomicOrdering::NotAtomic:
6428 case llvm::AtomicOrdering::Unordered:
6429 llvm_unreachable("Unexpected ordering.");
6430 }
6431 }
6432}
6433
6434static void emitOMPAtomicCompareExpr(
6435 CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO,
6436 const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D,
6437 const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly,
6438 SourceLocation Loc) {
6439 llvm::OpenMPIRBuilder &OMPBuilder =
6440 CGF.CGM.getOpenMPRuntime().getOMPBuilder();
6441
6442 OMPAtomicCompareOp Op;
6443 assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator");
6444 switch (cast<BinaryOperator>(Val: CE)->getOpcode()) {
6445 case BO_EQ:
6446 Op = OMPAtomicCompareOp::EQ;
6447 break;
6448 case BO_LT:
6449 Op = OMPAtomicCompareOp::MIN;
6450 break;
6451 case BO_GT:
6452 Op = OMPAtomicCompareOp::MAX;
6453 break;
6454 default:
6455 llvm_unreachable("unsupported atomic compare binary operator");
6456 }
6457
6458 LValue XLVal = CGF.EmitLValue(E: X);
6459 Address XAddr = XLVal.getAddress(CGF);
6460
6461 auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) {
6462 if (X->getType() == E->getType())
6463 return CGF.EmitScalarExpr(E);
6464 const Expr *NewE = E->IgnoreImplicitAsWritten();
6465 llvm::Value *V = CGF.EmitScalarExpr(E: NewE);
6466 if (NewE->getType() == X->getType())
6467 return V;
6468 return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc);
6469 };
6470
6471 llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E);
6472 llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr;
6473 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal))
6474 EVal = CGF.Builder.CreateIntCast(
6475 V: CI, DestTy: XLVal.getAddress(CGF).getElementType(),
6476 isSigned: E->getType()->hasSignedIntegerRepresentation());
6477 if (DVal)
6478 if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal))
6479 DVal = CGF.Builder.CreateIntCast(
6480 V: CI, DestTy: XLVal.getAddress(CGF).getElementType(),
6481 isSigned: D->getType()->hasSignedIntegerRepresentation());
6482
6483 llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{
6484 .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(),
6485 .IsSigned: X->getType()->hasSignedIntegerRepresentation(),
6486 .IsVolatile: X->getType().isVolatileQualified()};
6487 llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal;
6488 if (V) {
6489 LValue LV = CGF.EmitLValue(E: V);
6490 Address Addr = LV.getAddress(CGF);
6491 VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6492 .IsSigned: V->getType()->hasSignedIntegerRepresentation(),
6493 .IsVolatile: V->getType().isVolatileQualified()};
6494 }
6495 if (R) {
6496 LValue LV = CGF.EmitLValue(E: R);
6497 Address Addr = LV.getAddress(CGF);
6498 ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(),
6499 .IsSigned: R->getType()->hasSignedIntegerRepresentation(),
6500 .IsVolatile: R->getType().isVolatileQualified()};
6501 }
6502
6503 if (FailAO == llvm::AtomicOrdering::NotAtomic) {
6504 // fail clause was not mentionend on the
6505 // "#pragma omp atomic compare" construct.
6506 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6507 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6508 IsPostfixUpdate, IsFailOnly));
6509 } else
6510 CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare(
6511 Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr,
6512 IsPostfixUpdate, IsFailOnly, Failure: FailAO));
6513}
6514
6515static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
6516 llvm::AtomicOrdering AO,
6517 llvm::AtomicOrdering FailAO, bool IsPostfixUpdate,
6518 const Expr *X, const Expr *V, const Expr *R,
6519 const Expr *E, const Expr *UE, const Expr *D,
6520 const Expr *CE, bool IsXLHSInRHSPart,
6521 bool IsFailOnly, SourceLocation Loc) {
6522 switch (Kind) {
6523 case OMPC_read:
6524 emitOMPAtomicReadExpr(CGF, AO, X, V, Loc);
6525 break;
6526 case OMPC_write:
6527 emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc);
6528 break;
6529 case OMPC_unknown:
6530 case OMPC_update:
6531 emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc);
6532 break;
6533 case OMPC_capture:
6534 emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE,
6535 IsXLHSInRHSPart, Loc);
6536 break;
6537 case OMPC_compare: {
6538 emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE,
6539 IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc);
6540 break;
6541 }
6542 default:
6543 llvm_unreachable("Clause is not allowed in 'omp atomic'.");
6544 }
6545}
6546
6547void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
6548 llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic;
6549 // Fail Memory Clause Ordering.
6550 llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic;
6551 bool MemOrderingSpecified = false;
6552 if (S.getSingleClause<OMPSeqCstClause>()) {
6553 AO = llvm::AtomicOrdering::SequentiallyConsistent;
6554 MemOrderingSpecified = true;
6555 } else if (S.getSingleClause<OMPAcqRelClause>()) {
6556 AO = llvm::AtomicOrdering::AcquireRelease;
6557 MemOrderingSpecified = true;
6558 } else if (S.getSingleClause<OMPAcquireClause>()) {
6559 AO = llvm::AtomicOrdering::Acquire;
6560 MemOrderingSpecified = true;
6561 } else if (S.getSingleClause<OMPReleaseClause>()) {
6562 AO = llvm::AtomicOrdering::Release;
6563 MemOrderingSpecified = true;
6564 } else if (S.getSingleClause<OMPRelaxedClause>()) {
6565 AO = llvm::AtomicOrdering::Monotonic;
6566 MemOrderingSpecified = true;
6567 }
6568 llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered;
6569 OpenMPClauseKind Kind = OMPC_unknown;
6570 for (const OMPClause *C : S.clauses()) {
6571 // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause,
6572 // if it is first).
6573 OpenMPClauseKind K = C->getClauseKind();
6574 // TBD
6575 if (K == OMPC_weak)
6576 return;
6577 if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire ||
6578 K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint)
6579 continue;
6580 Kind = K;
6581 KindsEncountered.insert(K);
6582 }
6583 // We just need to correct Kind here. No need to set a bool saying it is
6584 // actually compare capture because we can tell from whether V and R are
6585 // nullptr.
6586 if (KindsEncountered.contains(OMPC_compare) &&
6587 KindsEncountered.contains(OMPC_capture))
6588 Kind = OMPC_compare;
6589 if (!MemOrderingSpecified) {
6590 llvm::AtomicOrdering DefaultOrder =
6591 CGM.getOpenMPRuntime().getDefaultMemoryOrdering();
6592 if (DefaultOrder == llvm::AtomicOrdering::Monotonic ||
6593 DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent ||
6594 (DefaultOrder == llvm::AtomicOrdering::AcquireRelease &&
6595 Kind == OMPC_capture)) {
6596 AO = DefaultOrder;
6597 } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) {
6598 if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) {
6599 AO = llvm::AtomicOrdering::Release;
6600 } else if (Kind == OMPC_read) {
6601 assert(Kind == OMPC_read && "Unexpected atomic kind.");
6602 AO = llvm::AtomicOrdering::Acquire;
6603 }
6604 }
6605 }
6606
6607 if (KindsEncountered.contains(OMPC_compare) &&
6608 KindsEncountered.contains(OMPC_fail)) {
6609 Kind = OMPC_compare;
6610 const auto *FailClause = S.getSingleClause<OMPFailClause>();
6611 if (FailClause) {
6612 OpenMPClauseKind FailParameter = FailClause->getFailParameter();
6613 if (FailParameter == llvm::omp::OMPC_relaxed)
6614 FailAO = llvm::AtomicOrdering::Monotonic;
6615 else if (FailParameter == llvm::omp::OMPC_acquire)
6616 FailAO = llvm::AtomicOrdering::Acquire;
6617 else if (FailParameter == llvm::omp::OMPC_seq_cst)
6618 FailAO = llvm::AtomicOrdering::SequentiallyConsistent;
6619 }
6620 }
6621
6622 LexicalScope Scope(*this, S.getSourceRange());
6623 EmitStopPoint(S: S.getAssociatedStmt());
6624 emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(),
6625 S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(),
6626 S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(),
6627 S.isFailOnly(), S.getBeginLoc());
6628}
6629
6630static void emitCommonOMPTargetDirective(CodeGenFunction &CGF,
6631 const OMPExecutableDirective &S,
6632 const RegionCodeGenTy &CodeGen) {
6633 assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind()));
6634 CodeGenModule &CGM = CGF.CGM;
6635
6636 // On device emit this construct as inlined code.
6637 if (CGM.getLangOpts().OpenMPIsTargetDevice) {
6638 OMPLexicalScope Scope(CGF, S, OMPD_target);
6639 CGM.getOpenMPRuntime().emitInlinedDirective(
6640 CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6641 CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt());
6642 });
6643 return;
6644 }
6645
6646 auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S);
6647 llvm::Function *Fn = nullptr;
6648 llvm::Constant *FnID = nullptr;
6649
6650 const Expr *IfCond = nullptr;
6651 // Check for the at most one if clause associated with the target region.
6652 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
6653 if (C->getNameModifier() == OMPD_unknown ||
6654 C->getNameModifier() == OMPD_target) {
6655 IfCond = C->getCondition();
6656 break;
6657 }
6658 }
6659
6660 // Check if we have any device clause associated with the directive.
6661 llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device(
6662 nullptr, OMPC_DEVICE_unknown);
6663 if (auto *C = S.getSingleClause<OMPDeviceClause>())
6664 Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier());
6665
6666 // Check if we have an if clause whose conditional always evaluates to false
6667 // or if we do not have any targets specified. If so the target region is not
6668 // an offload entry point.
6669 bool IsOffloadEntry = true;
6670 if (IfCond) {
6671 bool Val;
6672 if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val)
6673 IsOffloadEntry = false;
6674 }
6675 if (CGM.getLangOpts().OMPTargetTriples.empty())
6676 IsOffloadEntry = false;
6677
6678 if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) {
6679 unsigned DiagID = CGM.getDiags().getCustomDiagID(
6680 L: DiagnosticsEngine::Error,
6681 FormatString: "No offloading entry generated while offloading is mandatory.");
6682 CGM.getDiags().Report(DiagID);
6683 }
6684
6685 assert(CGF.CurFuncDecl && "No parent declaration for target region!");
6686 StringRef ParentName;
6687 // In case we have Ctors/Dtors we use the complete type variant to produce
6688 // the mangling of the device outlined kernel.
6689 if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl))
6690 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete));
6691 else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl))
6692 ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete));
6693 else
6694 ParentName =
6695 CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl)));
6696
6697 // Emit target region as a standalone region.
6698 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID,
6699 IsOffloadEntry, CodeGen);
6700 OMPLexicalScope Scope(CGF, S, OMPD_task);
6701 auto &&SizeEmitter =
6702 [IsOffloadEntry](CodeGenFunction &CGF,
6703 const OMPLoopDirective &D) -> llvm::Value * {
6704 if (IsOffloadEntry) {
6705 OMPLoopScope(CGF, D);
6706 // Emit calculation of the iterations count.
6707 llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations());
6708 NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty,
6709 /*isSigned=*/false);
6710 return NumIterations;
6711 }
6712 return nullptr;
6713 };
6714 CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device,
6715 SizeEmitter);
6716}
6717
6718static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S,
6719 PrePostActionTy &Action) {
6720 Action.Enter(CGF);
6721 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6722 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6723 CGF.EmitOMPPrivateClause(S, PrivateScope);
6724 (void)PrivateScope.Privatize();
6725 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6726 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6727
6728 CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt());
6729 CGF.EnsureInsertPoint();
6730}
6731
6732void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM,
6733 StringRef ParentName,
6734 const OMPTargetDirective &S) {
6735 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6736 emitTargetRegion(CGF, S, Action);
6737 };
6738 llvm::Function *Fn;
6739 llvm::Constant *Addr;
6740 // Emit target region as a standalone region.
6741 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6742 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6743 assert(Fn && Addr && "Target device function emission failed.");
6744}
6745
6746void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) {
6747 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6748 emitTargetRegion(CGF, S, Action);
6749 };
6750 emitCommonOMPTargetDirective(*this, S, CodeGen);
6751}
6752
6753static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF,
6754 const OMPExecutableDirective &S,
6755 OpenMPDirectiveKind InnermostKind,
6756 const RegionCodeGenTy &CodeGen) {
6757 const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams);
6758 llvm::Function *OutlinedFn =
6759 CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction(
6760 CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind,
6761 CodeGen);
6762
6763 const auto *NT = S.getSingleClause<OMPNumTeamsClause>();
6764 const auto *TL = S.getSingleClause<OMPThreadLimitClause>();
6765 if (NT || TL) {
6766 const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr;
6767 const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr;
6768
6769 CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit,
6770 Loc: S.getBeginLoc());
6771 }
6772
6773 OMPTeamsScope Scope(CGF, S);
6774 llvm::SmallVector<llvm::Value *, 16> CapturedVars;
6775 CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars);
6776 CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn,
6777 CapturedVars);
6778}
6779
6780void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) {
6781 // Emit teams region as a standalone region.
6782 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6783 Action.Enter(CGF);
6784 OMPPrivateScope PrivateScope(CGF);
6785 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6786 CGF.EmitOMPPrivateClause(S, PrivateScope);
6787 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6788 (void)PrivateScope.Privatize();
6789 CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt());
6790 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6791 };
6792 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6793 emitPostUpdateForReductionClause(*this, S,
6794 [](CodeGenFunction &) { return nullptr; });
6795}
6796
6797static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6798 const OMPTargetTeamsDirective &S) {
6799 auto *CS = S.getCapturedStmt(OMPD_teams);
6800 Action.Enter(CGF);
6801 // Emit teams region as a standalone region.
6802 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
6803 Action.Enter(CGF);
6804 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6805 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
6806 CGF.EmitOMPPrivateClause(S, PrivateScope);
6807 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6808 (void)PrivateScope.Privatize();
6809 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
6810 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
6811 CGF.EmitStmt(S: CS->getCapturedStmt());
6812 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6813 };
6814 emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen);
6815 emitPostUpdateForReductionClause(CGF, S,
6816 [](CodeGenFunction &) { return nullptr; });
6817}
6818
6819void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
6820 CodeGenModule &CGM, StringRef ParentName,
6821 const OMPTargetTeamsDirective &S) {
6822 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6823 emitTargetTeamsRegion(CGF, Action, S);
6824 };
6825 llvm::Function *Fn;
6826 llvm::Constant *Addr;
6827 // Emit target region as a standalone region.
6828 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6829 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6830 assert(Fn && Addr && "Target device function emission failed.");
6831}
6832
6833void CodeGenFunction::EmitOMPTargetTeamsDirective(
6834 const OMPTargetTeamsDirective &S) {
6835 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6836 emitTargetTeamsRegion(CGF, Action, S);
6837 };
6838 emitCommonOMPTargetDirective(*this, S, CodeGen);
6839}
6840
6841static void
6842emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action,
6843 const OMPTargetTeamsDistributeDirective &S) {
6844 Action.Enter(CGF);
6845 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6846 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6847 };
6848
6849 // Emit teams region as a standalone region.
6850 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6851 PrePostActionTy &Action) {
6852 Action.Enter(CGF);
6853 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6854 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6855 (void)PrivateScope.Privatize();
6856 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6857 CodeGenDistribute);
6858 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6859 };
6860 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
6861 emitPostUpdateForReductionClause(CGF, S,
6862 [](CodeGenFunction &) { return nullptr; });
6863}
6864
6865void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
6866 CodeGenModule &CGM, StringRef ParentName,
6867 const OMPTargetTeamsDistributeDirective &S) {
6868 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6869 emitTargetTeamsDistributeRegion(CGF, Action, S);
6870 };
6871 llvm::Function *Fn;
6872 llvm::Constant *Addr;
6873 // Emit target region as a standalone region.
6874 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6875 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6876 assert(Fn && Addr && "Target device function emission failed.");
6877}
6878
6879void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective(
6880 const OMPTargetTeamsDistributeDirective &S) {
6881 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6882 emitTargetTeamsDistributeRegion(CGF, Action, S);
6883 };
6884 emitCommonOMPTargetDirective(*this, S, CodeGen);
6885}
6886
6887static void emitTargetTeamsDistributeSimdRegion(
6888 CodeGenFunction &CGF, PrePostActionTy &Action,
6889 const OMPTargetTeamsDistributeSimdDirective &S) {
6890 Action.Enter(CGF);
6891 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6892 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6893 };
6894
6895 // Emit teams region as a standalone region.
6896 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6897 PrePostActionTy &Action) {
6898 Action.Enter(CGF);
6899 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
6900 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6901 (void)PrivateScope.Privatize();
6902 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6903 CodeGenDistribute);
6904 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6905 };
6906 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen);
6907 emitPostUpdateForReductionClause(CGF, S,
6908 [](CodeGenFunction &) { return nullptr; });
6909}
6910
6911void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
6912 CodeGenModule &CGM, StringRef ParentName,
6913 const OMPTargetTeamsDistributeSimdDirective &S) {
6914 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6915 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6916 };
6917 llvm::Function *Fn;
6918 llvm::Constant *Addr;
6919 // Emit target region as a standalone region.
6920 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
6921 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
6922 assert(Fn && Addr && "Target device function emission failed.");
6923}
6924
6925void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
6926 const OMPTargetTeamsDistributeSimdDirective &S) {
6927 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
6928 emitTargetTeamsDistributeSimdRegion(CGF, Action, S);
6929 };
6930 emitCommonOMPTargetDirective(*this, S, CodeGen);
6931}
6932
6933void CodeGenFunction::EmitOMPTeamsDistributeDirective(
6934 const OMPTeamsDistributeDirective &S) {
6935
6936 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6937 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6938 };
6939
6940 // Emit teams region as a standalone region.
6941 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6942 PrePostActionTy &Action) {
6943 Action.Enter(CGF);
6944 OMPPrivateScope PrivateScope(CGF);
6945 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6946 (void)PrivateScope.Privatize();
6947 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6948 CodeGenDistribute);
6949 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6950 };
6951 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
6952 emitPostUpdateForReductionClause(*this, S,
6953 [](CodeGenFunction &) { return nullptr; });
6954}
6955
6956void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective(
6957 const OMPTeamsDistributeSimdDirective &S) {
6958 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6959 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
6960 };
6961
6962 // Emit teams region as a standalone region.
6963 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6964 PrePostActionTy &Action) {
6965 Action.Enter(CGF);
6966 OMPPrivateScope PrivateScope(CGF);
6967 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6968 (void)PrivateScope.Privatize();
6969 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd,
6970 CodeGenDistribute);
6971 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6972 };
6973 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen);
6974 emitPostUpdateForReductionClause(*this, S,
6975 [](CodeGenFunction &) { return nullptr; });
6976}
6977
6978void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective(
6979 const OMPTeamsDistributeParallelForDirective &S) {
6980 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
6981 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
6982 IncExpr: S.getDistInc());
6983 };
6984
6985 // Emit teams region as a standalone region.
6986 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
6987 PrePostActionTy &Action) {
6988 Action.Enter(CGF);
6989 OMPPrivateScope PrivateScope(CGF);
6990 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
6991 (void)PrivateScope.Privatize();
6992 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
6993 CodeGenDistribute);
6994 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
6995 };
6996 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen);
6997 emitPostUpdateForReductionClause(*this, S,
6998 [](CodeGenFunction &) { return nullptr; });
6999}
7000
7001void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective(
7002 const OMPTeamsDistributeParallelForSimdDirective &S) {
7003 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7004 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7005 IncExpr: S.getDistInc());
7006 };
7007
7008 // Emit teams region as a standalone region.
7009 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7010 PrePostActionTy &Action) {
7011 Action.Enter(CGF);
7012 OMPPrivateScope PrivateScope(CGF);
7013 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7014 (void)PrivateScope.Privatize();
7015 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7016 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7017 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7018 };
7019 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd,
7020 CodeGen);
7021 emitPostUpdateForReductionClause(*this, S,
7022 [](CodeGenFunction &) { return nullptr; });
7023}
7024
7025void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) {
7026 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7027 llvm::Value *Device = nullptr;
7028 llvm::Value *NumDependences = nullptr;
7029 llvm::Value *DependenceList = nullptr;
7030
7031 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7032 Device = EmitScalarExpr(E: C->getDevice());
7033
7034 // Build list and emit dependences
7035 OMPTaskDataTy Data;
7036 buildDependences(S, Data);
7037 if (!Data.Dependences.empty()) {
7038 Address DependenciesArray = Address::invalid();
7039 std::tie(args&: NumDependences, args&: DependenciesArray) =
7040 CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences,
7041 Loc: S.getBeginLoc());
7042 DependenceList = DependenciesArray.emitRawPointer(CGF&: *this);
7043 }
7044 Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>();
7045
7046 assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() ||
7047 S.getSingleClause<OMPDestroyClause>() ||
7048 S.getSingleClause<OMPUseClause>())) &&
7049 "OMPNowaitClause clause is used separately in OMPInteropDirective.");
7050
7051 auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>();
7052 if (!ItOMPInitClause.empty()) {
7053 // Look at the multiple init clauses
7054 for (const OMPInitClause *C : ItOMPInitClause) {
7055 llvm::Value *InteropvarPtr =
7056 EmitLValue(C->getInteropVar()).getPointer(*this);
7057 llvm::omp::OMPInteropType InteropType =
7058 llvm::omp::OMPInteropType::Unknown;
7059 if (C->getIsTarget()) {
7060 InteropType = llvm::omp::OMPInteropType::Target;
7061 } else {
7062 assert(C->getIsTargetSync() &&
7063 "Expected interop-type target/targetsync");
7064 InteropType = llvm::omp::OMPInteropType::TargetSync;
7065 }
7066 OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType,
7067 Device, NumDependences, DependenceList,
7068 Data.HasNowaitClause);
7069 }
7070 }
7071 auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>();
7072 if (!ItOMPDestroyClause.empty()) {
7073 // Look at the multiple destroy clauses
7074 for (const OMPDestroyClause *C : ItOMPDestroyClause) {
7075 llvm::Value *InteropvarPtr =
7076 EmitLValue(C->getInteropVar()).getPointer(*this);
7077 OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device,
7078 NumDependences, DependenceList,
7079 Data.HasNowaitClause);
7080 }
7081 }
7082 auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>();
7083 if (!ItOMPUseClause.empty()) {
7084 // Look at the multiple use clauses
7085 for (const OMPUseClause *C : ItOMPUseClause) {
7086 llvm::Value *InteropvarPtr =
7087 EmitLValue(C->getInteropVar()).getPointer(*this);
7088 OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device,
7089 NumDependences, DependenceList,
7090 Data.HasNowaitClause);
7091 }
7092 }
7093}
7094
7095static void emitTargetTeamsDistributeParallelForRegion(
7096 CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S,
7097 PrePostActionTy &Action) {
7098 Action.Enter(CGF);
7099 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7100 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7101 IncExpr: S.getDistInc());
7102 };
7103
7104 // Emit teams region as a standalone region.
7105 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7106 PrePostActionTy &Action) {
7107 Action.Enter(CGF);
7108 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7109 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7110 (void)PrivateScope.Privatize();
7111 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7112 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7113 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7114 };
7115
7116 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
7117 CodeGenTeams);
7118 emitPostUpdateForReductionClause(CGF, S,
7119 [](CodeGenFunction &) { return nullptr; });
7120}
7121
7122void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
7123 CodeGenModule &CGM, StringRef ParentName,
7124 const OMPTargetTeamsDistributeParallelForDirective &S) {
7125 // Emit SPMD target teams distribute parallel for region as a standalone
7126 // region.
7127 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7128 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7129 };
7130 llvm::Function *Fn;
7131 llvm::Constant *Addr;
7132 // Emit target region as a standalone region.
7133 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7134 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7135 assert(Fn && Addr && "Target device function emission failed.");
7136}
7137
7138void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective(
7139 const OMPTargetTeamsDistributeParallelForDirective &S) {
7140 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7141 emitTargetTeamsDistributeParallelForRegion(CGF, S, Action);
7142 };
7143 emitCommonOMPTargetDirective(*this, S, CodeGen);
7144}
7145
7146static void emitTargetTeamsDistributeParallelForSimdRegion(
7147 CodeGenFunction &CGF,
7148 const OMPTargetTeamsDistributeParallelForSimdDirective &S,
7149 PrePostActionTy &Action) {
7150 Action.Enter(CGF);
7151 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7152 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7153 IncExpr: S.getDistInc());
7154 };
7155
7156 // Emit teams region as a standalone region.
7157 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7158 PrePostActionTy &Action) {
7159 Action.Enter(CGF);
7160 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7161 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7162 (void)PrivateScope.Privatize();
7163 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7164 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7165 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7166 };
7167
7168 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd,
7169 CodeGenTeams);
7170 emitPostUpdateForReductionClause(CGF, S,
7171 [](CodeGenFunction &) { return nullptr; });
7172}
7173
7174void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
7175 CodeGenModule &CGM, StringRef ParentName,
7176 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7177 // Emit SPMD target teams distribute parallel for simd region as a standalone
7178 // region.
7179 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7180 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7181 };
7182 llvm::Function *Fn;
7183 llvm::Constant *Addr;
7184 // Emit target region as a standalone region.
7185 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7186 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7187 assert(Fn && Addr && "Target device function emission failed.");
7188}
7189
7190void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective(
7191 const OMPTargetTeamsDistributeParallelForSimdDirective &S) {
7192 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7193 emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action);
7194 };
7195 emitCommonOMPTargetDirective(*this, S, CodeGen);
7196}
7197
7198void CodeGenFunction::EmitOMPCancellationPointDirective(
7199 const OMPCancellationPointDirective &S) {
7200 CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(),
7201 S.getCancelRegion());
7202}
7203
7204void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) {
7205 const Expr *IfCond = nullptr;
7206 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7207 if (C->getNameModifier() == OMPD_unknown ||
7208 C->getNameModifier() == OMPD_cancel) {
7209 IfCond = C->getCondition();
7210 break;
7211 }
7212 }
7213 if (CGM.getLangOpts().OpenMPIRBuilder) {
7214 llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder();
7215 // TODO: This check is necessary as we only generate `omp parallel` through
7216 // the OpenMPIRBuilder for now.
7217 if (S.getCancelRegion() == OMPD_parallel ||
7218 S.getCancelRegion() == OMPD_sections ||
7219 S.getCancelRegion() == OMPD_section) {
7220 llvm::Value *IfCondition = nullptr;
7221 if (IfCond)
7222 IfCondition = EmitScalarExpr(E: IfCond,
7223 /*IgnoreResultAssign=*/true);
7224 return Builder.restoreIP(
7225 IP: OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion()));
7226 }
7227 }
7228
7229 CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond,
7230 S.getCancelRegion());
7231}
7232
7233CodeGenFunction::JumpDest
7234CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) {
7235 if (Kind == OMPD_parallel || Kind == OMPD_task ||
7236 Kind == OMPD_target_parallel || Kind == OMPD_taskloop ||
7237 Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop)
7238 return ReturnBlock;
7239 assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections ||
7240 Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for ||
7241 Kind == OMPD_distribute_parallel_for ||
7242 Kind == OMPD_target_parallel_for ||
7243 Kind == OMPD_teams_distribute_parallel_for ||
7244 Kind == OMPD_target_teams_distribute_parallel_for);
7245 return OMPCancelStack.getExitBlock();
7246}
7247
7248void CodeGenFunction::EmitOMPUseDevicePtrClause(
7249 const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope,
7250 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7251 CaptureDeviceAddrMap) {
7252 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7253 for (const Expr *OrigVarIt : C.varlists()) {
7254 const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl());
7255 if (!Processed.insert(OrigVD).second)
7256 continue;
7257
7258 // In order to identify the right initializer we need to match the
7259 // declaration used by the mapping logic. In some cases we may get
7260 // OMPCapturedExprDecl that refers to the original declaration.
7261 const ValueDecl *MatchingVD = OrigVD;
7262 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7263 // OMPCapturedExprDecl are used to privative fields of the current
7264 // structure.
7265 const auto *ME = cast<MemberExpr>(OED->getInit());
7266 assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) &&
7267 "Base should be the current struct!");
7268 MatchingVD = ME->getMemberDecl();
7269 }
7270
7271 // If we don't have information about the current list item, move on to
7272 // the next one.
7273 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7274 if (InitAddrIt == CaptureDeviceAddrMap.end())
7275 continue;
7276
7277 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7278
7279 // Return the address of the private variable.
7280 bool IsRegistered = PrivateScope.addPrivate(
7281 OrigVD,
7282 Address(InitAddrIt->second, Ty,
7283 getContext().getTypeAlignInChars(getContext().VoidPtrTy)));
7284 assert(IsRegistered && "firstprivate var already registered as private");
7285 // Silence the warning about unused variable.
7286 (void)IsRegistered;
7287 }
7288}
7289
7290static const VarDecl *getBaseDecl(const Expr *Ref) {
7291 const Expr *Base = Ref->IgnoreParenImpCasts();
7292 while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: Base))
7293 Base = OASE->getBase()->IgnoreParenImpCasts();
7294 while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base))
7295 Base = ASE->getBase()->IgnoreParenImpCasts();
7296 return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl());
7297}
7298
7299void CodeGenFunction::EmitOMPUseDeviceAddrClause(
7300 const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope,
7301 const llvm::DenseMap<const ValueDecl *, llvm::Value *>
7302 CaptureDeviceAddrMap) {
7303 llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed;
7304 for (const Expr *Ref : C.varlists()) {
7305 const VarDecl *OrigVD = getBaseDecl(Ref);
7306 if (!Processed.insert(OrigVD).second)
7307 continue;
7308 // In order to identify the right initializer we need to match the
7309 // declaration used by the mapping logic. In some cases we may get
7310 // OMPCapturedExprDecl that refers to the original declaration.
7311 const ValueDecl *MatchingVD = OrigVD;
7312 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) {
7313 // OMPCapturedExprDecl are used to privative fields of the current
7314 // structure.
7315 const auto *ME = cast<MemberExpr>(OED->getInit());
7316 assert(isa<CXXThisExpr>(ME->getBase()) &&
7317 "Base should be the current struct!");
7318 MatchingVD = ME->getMemberDecl();
7319 }
7320
7321 // If we don't have information about the current list item, move on to
7322 // the next one.
7323 auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD);
7324 if (InitAddrIt == CaptureDeviceAddrMap.end())
7325 continue;
7326
7327 llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType());
7328
7329 Address PrivAddr =
7330 Address(InitAddrIt->second, Ty,
7331 getContext().getTypeAlignInChars(getContext().VoidPtrTy));
7332 // For declrefs and variable length array need to load the pointer for
7333 // correct mapping, since the pointer to the data was passed to the runtime.
7334 if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) ||
7335 MatchingVD->getType()->isArrayType()) {
7336 QualType PtrTy = getContext().getPointerType(
7337 OrigVD->getType().getNonReferenceType());
7338 PrivAddr =
7339 EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)),
7340 PtrTy->castAs<PointerType>());
7341 }
7342
7343 (void)PrivateScope.addPrivate(OrigVD, PrivAddr);
7344 }
7345}
7346
7347// Generate the instructions for '#pragma omp target data' directive.
7348void CodeGenFunction::EmitOMPTargetDataDirective(
7349 const OMPTargetDataDirective &S) {
7350 CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true,
7351 /*SeparateBeginEndCalls=*/true);
7352
7353 // Create a pre/post action to signal the privatization of the device pointer.
7354 // This action can be replaced by the OpenMP runtime code generation to
7355 // deactivate privatization.
7356 bool PrivatizeDevicePointers = false;
7357 class DevicePointerPrivActionTy : public PrePostActionTy {
7358 bool &PrivatizeDevicePointers;
7359
7360 public:
7361 explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers)
7362 : PrivatizeDevicePointers(PrivatizeDevicePointers) {}
7363 void Enter(CodeGenFunction &CGF) override {
7364 PrivatizeDevicePointers = true;
7365 }
7366 };
7367 DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers);
7368
7369 auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7370 auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7371 CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt());
7372 };
7373
7374 // Codegen that selects whether to generate the privatization code or not.
7375 auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) {
7376 RegionCodeGenTy RCG(InnermostCodeGen);
7377 PrivatizeDevicePointers = false;
7378
7379 // Call the pre-action to change the status of PrivatizeDevicePointers if
7380 // needed.
7381 Action.Enter(CGF);
7382
7383 if (PrivatizeDevicePointers) {
7384 OMPPrivateScope PrivateScope(CGF);
7385 // Emit all instances of the use_device_ptr clause.
7386 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7387 CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope,
7388 Info.CaptureDeviceAddrMap);
7389 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7390 CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope,
7391 Info.CaptureDeviceAddrMap);
7392 (void)PrivateScope.Privatize();
7393 RCG(CGF);
7394 } else {
7395 // If we don't have target devices, don't bother emitting the data
7396 // mapping code.
7397 std::optional<OpenMPDirectiveKind> CaptureRegion;
7398 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7399 // Emit helper decls of the use_device_ptr/use_device_addr clauses.
7400 for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>())
7401 for (const Expr *E : C->varlists()) {
7402 const Decl *D = cast<DeclRefExpr>(E)->getDecl();
7403 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7404 CGF.EmitVarDecl(*OED);
7405 }
7406 for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>())
7407 for (const Expr *E : C->varlists()) {
7408 const Decl *D = getBaseDecl(E);
7409 if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D))
7410 CGF.EmitVarDecl(*OED);
7411 }
7412 } else {
7413 CaptureRegion = OMPD_unknown;
7414 }
7415
7416 OMPLexicalScope Scope(CGF, S, CaptureRegion);
7417 RCG(CGF);
7418 }
7419 };
7420
7421 // Forward the provided action to the privatization codegen.
7422 RegionCodeGenTy PrivRCG(PrivCodeGen);
7423 PrivRCG.setAction(Action);
7424
7425 // Notwithstanding the body of the region is emitted as inlined directive,
7426 // we don't use an inline scope as changes in the references inside the
7427 // region are expected to be visible outside, so we do not privative them.
7428 OMPLexicalScope Scope(CGF, S);
7429 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data,
7430 PrivRCG);
7431 };
7432
7433 RegionCodeGenTy RCG(CodeGen);
7434
7435 // If we don't have target devices, don't bother emitting the data mapping
7436 // code.
7437 if (CGM.getLangOpts().OMPTargetTriples.empty()) {
7438 RCG(*this);
7439 return;
7440 }
7441
7442 // Check if we have any if clause associated with the directive.
7443 const Expr *IfCond = nullptr;
7444 if (const auto *C = S.getSingleClause<OMPIfClause>())
7445 IfCond = C->getCondition();
7446
7447 // Check if we have any device clause associated with the directive.
7448 const Expr *Device = nullptr;
7449 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7450 Device = C->getDevice();
7451
7452 // Set the action to signal privatization of device pointers.
7453 RCG.setAction(PrivAction);
7454
7455 // Emit region code.
7456 CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG,
7457 Info);
7458}
7459
7460void CodeGenFunction::EmitOMPTargetEnterDataDirective(
7461 const OMPTargetEnterDataDirective &S) {
7462 // If we don't have target devices, don't bother emitting the data mapping
7463 // code.
7464 if (CGM.getLangOpts().OMPTargetTriples.empty())
7465 return;
7466
7467 // Check if we have any if clause associated with the directive.
7468 const Expr *IfCond = nullptr;
7469 if (const auto *C = S.getSingleClause<OMPIfClause>())
7470 IfCond = C->getCondition();
7471
7472 // Check if we have any device clause associated with the directive.
7473 const Expr *Device = nullptr;
7474 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7475 Device = C->getDevice();
7476
7477 OMPLexicalScope Scope(*this, S, OMPD_task);
7478 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7479}
7480
7481void CodeGenFunction::EmitOMPTargetExitDataDirective(
7482 const OMPTargetExitDataDirective &S) {
7483 // If we don't have target devices, don't bother emitting the data mapping
7484 // code.
7485 if (CGM.getLangOpts().OMPTargetTriples.empty())
7486 return;
7487
7488 // Check if we have any if clause associated with the directive.
7489 const Expr *IfCond = nullptr;
7490 if (const auto *C = S.getSingleClause<OMPIfClause>())
7491 IfCond = C->getCondition();
7492
7493 // Check if we have any device clause associated with the directive.
7494 const Expr *Device = nullptr;
7495 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7496 Device = C->getDevice();
7497
7498 OMPLexicalScope Scope(*this, S, OMPD_task);
7499 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7500}
7501
7502static void emitTargetParallelRegion(CodeGenFunction &CGF,
7503 const OMPTargetParallelDirective &S,
7504 PrePostActionTy &Action) {
7505 // Get the captured statement associated with the 'parallel' region.
7506 const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
7507 Action.Enter(CGF);
7508 auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) {
7509 Action.Enter(CGF);
7510 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7511 (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope);
7512 CGF.EmitOMPPrivateClause(S, PrivateScope);
7513 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7514 (void)PrivateScope.Privatize();
7515 if (isOpenMPTargetExecutionDirective(S.getDirectiveKind()))
7516 CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S);
7517 // TODO: Add support for clauses.
7518 CGF.EmitStmt(S: CS->getCapturedStmt());
7519 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
7520 };
7521 emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
7522 emitEmptyBoundParameters);
7523 emitPostUpdateForReductionClause(CGF, S,
7524 [](CodeGenFunction &) { return nullptr; });
7525}
7526
7527void CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
7528 CodeGenModule &CGM, StringRef ParentName,
7529 const OMPTargetParallelDirective &S) {
7530 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7531 emitTargetParallelRegion(CGF, S, Action);
7532 };
7533 llvm::Function *Fn;
7534 llvm::Constant *Addr;
7535 // Emit target region as a standalone region.
7536 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7537 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7538 assert(Fn && Addr && "Target device function emission failed.");
7539}
7540
7541void CodeGenFunction::EmitOMPTargetParallelDirective(
7542 const OMPTargetParallelDirective &S) {
7543 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7544 emitTargetParallelRegion(CGF, S, Action);
7545 };
7546 emitCommonOMPTargetDirective(*this, S, CodeGen);
7547}
7548
7549static void emitTargetParallelForRegion(CodeGenFunction &CGF,
7550 const OMPTargetParallelForDirective &S,
7551 PrePostActionTy &Action) {
7552 Action.Enter(CGF);
7553 // Emit directive as a combined directive that consists of two implicit
7554 // directives: 'parallel' with 'for' directive.
7555 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7556 Action.Enter(CGF);
7557 CodeGenFunction::OMPCancelStackRAII CancelRegion(
7558 CGF, OMPD_target_parallel_for, S.hasCancel());
7559 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7560 CGDispatchBounds: emitDispatchForLoopBounds);
7561 };
7562 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
7563 emitEmptyBoundParameters);
7564}
7565
7566void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
7567 CodeGenModule &CGM, StringRef ParentName,
7568 const OMPTargetParallelForDirective &S) {
7569 // Emit SPMD target parallel for region as a standalone region.
7570 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7571 emitTargetParallelForRegion(CGF, S, Action);
7572 };
7573 llvm::Function *Fn;
7574 llvm::Constant *Addr;
7575 // Emit target region as a standalone region.
7576 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7577 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7578 assert(Fn && Addr && "Target device function emission failed.");
7579}
7580
7581void CodeGenFunction::EmitOMPTargetParallelForDirective(
7582 const OMPTargetParallelForDirective &S) {
7583 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7584 emitTargetParallelForRegion(CGF, S, Action);
7585 };
7586 emitCommonOMPTargetDirective(*this, S, CodeGen);
7587}
7588
7589static void
7590emitTargetParallelForSimdRegion(CodeGenFunction &CGF,
7591 const OMPTargetParallelForSimdDirective &S,
7592 PrePostActionTy &Action) {
7593 Action.Enter(CGF);
7594 // Emit directive as a combined directive that consists of two implicit
7595 // directives: 'parallel' with 'for' directive.
7596 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7597 Action.Enter(CGF);
7598 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
7599 CGDispatchBounds: emitDispatchForLoopBounds);
7600 };
7601 emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen,
7602 emitEmptyBoundParameters);
7603}
7604
7605void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
7606 CodeGenModule &CGM, StringRef ParentName,
7607 const OMPTargetParallelForSimdDirective &S) {
7608 // Emit SPMD target parallel for region as a standalone region.
7609 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7610 emitTargetParallelForSimdRegion(CGF, S, Action);
7611 };
7612 llvm::Function *Fn;
7613 llvm::Constant *Addr;
7614 // Emit target region as a standalone region.
7615 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
7616 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
7617 assert(Fn && Addr && "Target device function emission failed.");
7618}
7619
7620void CodeGenFunction::EmitOMPTargetParallelForSimdDirective(
7621 const OMPTargetParallelForSimdDirective &S) {
7622 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7623 emitTargetParallelForSimdRegion(CGF, S, Action);
7624 };
7625 emitCommonOMPTargetDirective(*this, S, CodeGen);
7626}
7627
7628/// Emit a helper variable and return corresponding lvalue.
7629static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper,
7630 const ImplicitParamDecl *PVD,
7631 CodeGenFunction::OMPPrivateScope &Privates) {
7632 const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl());
7633 Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(PVD));
7634}
7635
7636void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) {
7637 assert(isOpenMPTaskLoopDirective(S.getDirectiveKind()));
7638 // Emit outlined function for task construct.
7639 const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop);
7640 Address CapturedStruct = Address::invalid();
7641 {
7642 OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7643 CapturedStruct = GenerateCapturedStmtArgument(S: *CS);
7644 }
7645 QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl());
7646 const Expr *IfCond = nullptr;
7647 for (const auto *C : S.getClausesOfKind<OMPIfClause>()) {
7648 if (C->getNameModifier() == OMPD_unknown ||
7649 C->getNameModifier() == OMPD_taskloop) {
7650 IfCond = C->getCondition();
7651 break;
7652 }
7653 }
7654
7655 OMPTaskDataTy Data;
7656 // Check if taskloop must be emitted without taskgroup.
7657 Data.Nogroup = S.getSingleClause<OMPNogroupClause>();
7658 // TODO: Check if we should emit tied or untied task.
7659 Data.Tied = true;
7660 // Set scheduling for taskloop
7661 if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) {
7662 // grainsize clause
7663 Data.Schedule.setInt(/*IntVal=*/false);
7664 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize()));
7665 } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) {
7666 // num_tasks clause
7667 Data.Schedule.setInt(/*IntVal=*/true);
7668 Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks()));
7669 }
7670
7671 auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) {
7672 // if (PreCond) {
7673 // for (IV in 0..LastIteration) BODY;
7674 // <Final counter/linear vars updates>;
7675 // }
7676 //
7677
7678 // Emit: if (PreCond) - begin.
7679 // If the condition constant folds and can be elided, avoid emitting the
7680 // whole loop.
7681 bool CondConstant;
7682 llvm::BasicBlock *ContBlock = nullptr;
7683 OMPLoopScope PreInitScope(CGF, S);
7684 if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) {
7685 if (!CondConstant)
7686 return;
7687 } else {
7688 llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then");
7689 ContBlock = CGF.createBasicBlock(name: "taskloop.if.end");
7690 emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock,
7691 TrueCount: CGF.getProfileCount(&S));
7692 CGF.EmitBlock(BB: ThenBlock);
7693 CGF.incrementProfileCounter(&S);
7694 }
7695
7696 (void)CGF.EmitOMPLinearClauseInit(D: S);
7697
7698 OMPPrivateScope LoopScope(CGF);
7699 // Emit helper vars inits.
7700 enum { LowerBound = 5, UpperBound, Stride, LastIter };
7701 auto *I = CS->getCapturedDecl()->param_begin();
7702 auto *LBP = std::next(x: I, n: LowerBound);
7703 auto *UBP = std::next(x: I, n: UpperBound);
7704 auto *STP = std::next(x: I, n: Stride);
7705 auto *LIP = std::next(x: I, n: LastIter);
7706 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP,
7707 Privates&: LoopScope);
7708 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP,
7709 Privates&: LoopScope);
7710 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope);
7711 mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP,
7712 Privates&: LoopScope);
7713 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7714 CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope);
7715 bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope);
7716 (void)LoopScope.Privatize();
7717 // Emit the loop iteration variable.
7718 const Expr *IVExpr = S.getIterationVariable();
7719 const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl());
7720 CGF.EmitVarDecl(D: *IVDecl);
7721 CGF.EmitIgnoredExpr(E: S.getInit());
7722
7723 // Emit the iterations count variable.
7724 // If it is not a variable, Sema decided to calculate iterations count on
7725 // each iteration (e.g., it is foldable into a constant).
7726 if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) {
7727 CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl()));
7728 // Emit calculation of the iterations count.
7729 CGF.EmitIgnoredExpr(E: S.getCalcLastIteration());
7730 }
7731
7732 {
7733 OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false);
7734 emitCommonSimdLoop(
7735 CGF, S,
7736 SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7737 if (isOpenMPSimdDirective(S.getDirectiveKind()))
7738 CGF.EmitOMPSimdInit(D: S);
7739 },
7740 BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) {
7741 CGF.EmitOMPInnerLoop(
7742 S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
7743 [&S](CodeGenFunction &CGF) {
7744 emitOMPLoopBodyWithStopPoint(CGF, S,
7745 LoopExit: CodeGenFunction::JumpDest());
7746 },
7747 [](CodeGenFunction &) {});
7748 });
7749 }
7750 // Emit: if (PreCond) - end.
7751 if (ContBlock) {
7752 CGF.EmitBranch(Block: ContBlock);
7753 CGF.EmitBlock(BB: ContBlock, IsFinished: true);
7754 }
7755 // Emit final copy of the lastprivate variables if IsLastIter != 0.
7756 if (HasLastprivateClause) {
7757 CGF.EmitOMPLastprivateClauseFinal(
7758 S, isOpenMPSimdDirective(S.getDirectiveKind()),
7759 CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar(
7760 CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7761 (*LIP)->getType(), S.getBeginLoc())));
7762 }
7763 LoopScope.restoreMap();
7764 CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) {
7765 return CGF.Builder.CreateIsNotNull(
7766 CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false,
7767 (*LIP)->getType(), S.getBeginLoc()));
7768 });
7769 };
7770 auto &&TaskGen = [&S, SharedsTy, CapturedStruct,
7771 IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn,
7772 const OMPTaskDataTy &Data) {
7773 auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond,
7774 &Data](CodeGenFunction &CGF, PrePostActionTy &) {
7775 OMPLoopScope PreInitScope(CGF, S);
7776 CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S,
7777 TaskFunction: OutlinedFn, SharedsTy,
7778 Shareds: CapturedStruct, IfCond, Data);
7779 };
7780 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop,
7781 CodeGen);
7782 };
7783 if (Data.Nogroup) {
7784 EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data);
7785 } else {
7786 CGM.getOpenMPRuntime().emitTaskgroupRegion(
7787 CGF&: *this,
7788 TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF,
7789 PrePostActionTy &Action) {
7790 Action.Enter(CGF);
7791 CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen,
7792 Data);
7793 },
7794 Loc: S.getBeginLoc());
7795 }
7796}
7797
7798void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) {
7799 auto LPCRegion =
7800 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7801 EmitOMPTaskLoopBasedDirective(S);
7802}
7803
7804void CodeGenFunction::EmitOMPTaskLoopSimdDirective(
7805 const OMPTaskLoopSimdDirective &S) {
7806 auto LPCRegion =
7807 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7808 OMPLexicalScope Scope(*this, S);
7809 EmitOMPTaskLoopBasedDirective(S);
7810}
7811
7812void CodeGenFunction::EmitOMPMasterTaskLoopDirective(
7813 const OMPMasterTaskLoopDirective &S) {
7814 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7815 Action.Enter(CGF);
7816 EmitOMPTaskLoopBasedDirective(S);
7817 };
7818 auto LPCRegion =
7819 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7820 OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false);
7821 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
7822}
7823
7824void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective(
7825 const OMPMasterTaskLoopSimdDirective &S) {
7826 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7827 Action.Enter(CGF);
7828 EmitOMPTaskLoopBasedDirective(S);
7829 };
7830 auto LPCRegion =
7831 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7832 OMPLexicalScope Scope(*this, S);
7833 CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc());
7834}
7835
7836void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective(
7837 const OMPParallelMasterTaskLoopDirective &S) {
7838 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7839 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7840 PrePostActionTy &Action) {
7841 Action.Enter(CGF);
7842 CGF.EmitOMPTaskLoopBasedDirective(S);
7843 };
7844 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7845 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
7846 Loc: S.getBeginLoc());
7847 };
7848 auto LPCRegion =
7849 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7850 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen,
7851 emitEmptyBoundParameters);
7852}
7853
7854void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective(
7855 const OMPParallelMasterTaskLoopSimdDirective &S) {
7856 auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7857 auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF,
7858 PrePostActionTy &Action) {
7859 Action.Enter(CGF);
7860 CGF.EmitOMPTaskLoopBasedDirective(S);
7861 };
7862 OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false);
7863 CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen,
7864 Loc: S.getBeginLoc());
7865 };
7866 auto LPCRegion =
7867 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7868 emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen,
7869 emitEmptyBoundParameters);
7870}
7871
7872// Generate the instructions for '#pragma omp target update' directive.
7873void CodeGenFunction::EmitOMPTargetUpdateDirective(
7874 const OMPTargetUpdateDirective &S) {
7875 // If we don't have target devices, don't bother emitting the data mapping
7876 // code.
7877 if (CGM.getLangOpts().OMPTargetTriples.empty())
7878 return;
7879
7880 // Check if we have any if clause associated with the directive.
7881 const Expr *IfCond = nullptr;
7882 if (const auto *C = S.getSingleClause<OMPIfClause>())
7883 IfCond = C->getCondition();
7884
7885 // Check if we have any device clause associated with the directive.
7886 const Expr *Device = nullptr;
7887 if (const auto *C = S.getSingleClause<OMPDeviceClause>())
7888 Device = C->getDevice();
7889
7890 OMPLexicalScope Scope(*this, S, OMPD_task);
7891 CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device);
7892}
7893
7894void CodeGenFunction::EmitOMPGenericLoopDirective(
7895 const OMPGenericLoopDirective &S) {
7896 // Unimplemented, just inline the underlying statement for now.
7897 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7898 // Emit the loop iteration variable.
7899 const Stmt *CS =
7900 cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt();
7901 const auto *ForS = dyn_cast<ForStmt>(Val: CS);
7902 if (ForS && !isa<DeclStmt>(ForS->getInit())) {
7903 OMPPrivateScope LoopScope(CGF);
7904 CGF.EmitOMPPrivateLoopCounters(S, LoopScope);
7905 (void)LoopScope.Privatize();
7906 CGF.EmitStmt(S: CS);
7907 LoopScope.restoreMap();
7908 } else {
7909 CGF.EmitStmt(S: CS);
7910 }
7911 };
7912 OMPLexicalScope Scope(*this, S, OMPD_unknown);
7913 CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen);
7914}
7915
7916void CodeGenFunction::EmitOMPParallelGenericLoopDirective(
7917 const OMPLoopDirective &S) {
7918 // Emit combined directive as if its consituent constructs are 'parallel'
7919 // and 'for'.
7920 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
7921 Action.Enter(CGF);
7922 emitOMPCopyinClause(CGF, S);
7923 (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false);
7924 };
7925 {
7926 auto LPCRegion =
7927 CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S);
7928 emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
7929 emitEmptyBoundParameters);
7930 }
7931 // Check for outer lastprivate conditional update.
7932 checkForLastprivateConditionalUpdate(*this, S);
7933}
7934
7935void CodeGenFunction::EmitOMPTeamsGenericLoopDirective(
7936 const OMPTeamsGenericLoopDirective &S) {
7937 // To be consistent with current behavior of 'target teams loop', emit
7938 // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'.
7939 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7940 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
7941 };
7942
7943 // Emit teams region as a standalone region.
7944 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7945 PrePostActionTy &Action) {
7946 Action.Enter(CGF);
7947 OMPPrivateScope PrivateScope(CGF);
7948 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7949 (void)PrivateScope.Privatize();
7950 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute,
7951 CodeGenDistribute);
7952 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7953 };
7954 emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen);
7955 emitPostUpdateForReductionClause(*this, S,
7956 [](CodeGenFunction &) { return nullptr; });
7957}
7958
7959#ifndef NDEBUG
7960static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF,
7961 std::string StatusMsg,
7962 const OMPExecutableDirective &D) {
7963 bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice;
7964 if (IsDevice)
7965 StatusMsg += ": DEVICE";
7966 else
7967 StatusMsg += ": HOST";
7968 SourceLocation L = D.getBeginLoc();
7969 auto &SM = CGF.getContext().getSourceManager();
7970 PresumedLoc PLoc = SM.getPresumedLoc(Loc: L);
7971 const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr;
7972 unsigned LineNo =
7973 PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(Loc: L);
7974 llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n";
7975}
7976#endif
7977
7978static void emitTargetTeamsGenericLoopRegionAsParallel(
7979 CodeGenFunction &CGF, PrePostActionTy &Action,
7980 const OMPTargetTeamsGenericLoopDirective &S) {
7981 Action.Enter(CGF);
7982 // Emit 'teams loop' as if its constituent constructs are 'distribute,
7983 // 'parallel, and 'for'.
7984 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
7985 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined,
7986 IncExpr: S.getDistInc());
7987 };
7988
7989 // Emit teams region as a standalone region.
7990 auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
7991 PrePostActionTy &Action) {
7992 Action.Enter(CGF);
7993 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
7994 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
7995 (void)PrivateScope.Privatize();
7996 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
7997 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
7998 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
7999 };
8000 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8001 emitTargetTeamsLoopCodegenStatus(
8002 CGF, TTL_CODEGEN_TYPE " as parallel for", S));
8003 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for,
8004 CodeGenTeams);
8005 emitPostUpdateForReductionClause(CGF, S,
8006 [](CodeGenFunction &) { return nullptr; });
8007}
8008
8009static void emitTargetTeamsGenericLoopRegionAsDistribute(
8010 CodeGenFunction &CGF, PrePostActionTy &Action,
8011 const OMPTargetTeamsGenericLoopDirective &S) {
8012 Action.Enter(CGF);
8013 // Emit 'teams loop' as if its constituent construct is 'distribute'.
8014 auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
8015 CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc());
8016 };
8017
8018 // Emit teams region as a standalone region.
8019 auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF,
8020 PrePostActionTy &Action) {
8021 Action.Enter(CGF);
8022 CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
8023 CGF.EmitOMPReductionClauseInit(S, PrivateScope);
8024 (void)PrivateScope.Privatize();
8025 CGF.CGM.getOpenMPRuntime().emitInlinedDirective(
8026 CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false);
8027 CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams);
8028 };
8029 DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE,
8030 emitTargetTeamsLoopCodegenStatus(
8031 CGF, TTL_CODEGEN_TYPE " as distribute", S));
8032 emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen);
8033 emitPostUpdateForReductionClause(CGF, S,
8034 [](CodeGenFunction &) { return nullptr; });
8035}
8036
8037void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective(
8038 const OMPTargetTeamsGenericLoopDirective &S) {
8039 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8040 if (S.canBeParallelFor())
8041 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8042 else
8043 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8044 };
8045 emitCommonOMPTargetDirective(*this, S, CodeGen);
8046}
8047
8048void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
8049 CodeGenModule &CGM, StringRef ParentName,
8050 const OMPTargetTeamsGenericLoopDirective &S) {
8051 // Emit SPMD target parallel loop region as a standalone region.
8052 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8053 if (S.canBeParallelFor())
8054 emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S);
8055 else
8056 emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S);
8057 };
8058 llvm::Function *Fn;
8059 llvm::Constant *Addr;
8060 // Emit target region as a standalone region.
8061 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8062 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8063 assert(Fn && Addr &&
8064 "Target device function emission failed for 'target teams loop'.");
8065}
8066
8067static void emitTargetParallelGenericLoopRegion(
8068 CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S,
8069 PrePostActionTy &Action) {
8070 Action.Enter(CGF);
8071 // Emit as 'parallel for'.
8072 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8073 Action.Enter(CGF);
8074 CodeGenFunction::OMPCancelStackRAII CancelRegion(
8075 CGF, OMPD_target_parallel_loop, /*hasCancel=*/false);
8076 CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds,
8077 CGDispatchBounds: emitDispatchForLoopBounds);
8078 };
8079 emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen,
8080 emitEmptyBoundParameters);
8081}
8082
8083void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
8084 CodeGenModule &CGM, StringRef ParentName,
8085 const OMPTargetParallelGenericLoopDirective &S) {
8086 // Emit target parallel loop region as a standalone region.
8087 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8088 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8089 };
8090 llvm::Function *Fn;
8091 llvm::Constant *Addr;
8092 // Emit target region as a standalone region.
8093 CGM.getOpenMPRuntime().emitTargetOutlinedFunction(
8094 S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen);
8095 assert(Fn && Addr && "Target device function emission failed.");
8096}
8097
8098/// Emit combined directive 'target parallel loop' as if its constituent
8099/// constructs are 'target', 'parallel', and 'for'.
8100void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective(
8101 const OMPTargetParallelGenericLoopDirective &S) {
8102 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
8103 emitTargetParallelGenericLoopRegion(CGF, S, Action);
8104 };
8105 emitCommonOMPTargetDirective(*this, S, CodeGen);
8106}
8107
8108void CodeGenFunction::EmitSimpleOMPExecutableDirective(
8109 const OMPExecutableDirective &D) {
8110 if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) {
8111 EmitOMPScanDirective(S: *SD);
8112 return;
8113 }
8114 if (!D.hasAssociatedStmt() || !D.getAssociatedStmt())
8115 return;
8116 auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) {
8117 OMPPrivateScope GlobalsScope(CGF);
8118 if (isOpenMPTaskingDirective(D.getDirectiveKind())) {
8119 // Capture global firstprivates to avoid crash.
8120 for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) {
8121 for (const Expr *Ref : C->varlists()) {
8122 const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts());
8123 if (!DRE)
8124 continue;
8125 const auto *VD = dyn_cast<VarDecl>(DRE->getDecl());
8126 if (!VD || VD->hasLocalStorage())
8127 continue;
8128 if (!CGF.LocalDeclMap.count(VD)) {
8129 LValue GlobLVal = CGF.EmitLValue(Ref);
8130 GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF));
8131 }
8132 }
8133 }
8134 }
8135 if (isOpenMPSimdDirective(D.getDirectiveKind())) {
8136 (void)GlobalsScope.Privatize();
8137 ParentLoopDirectiveForScanRegion ScanRegion(CGF, D);
8138 emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action);
8139 } else {
8140 if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) {
8141 for (const Expr *E : LD->counters()) {
8142 const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl());
8143 if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) {
8144 LValue GlobLVal = CGF.EmitLValue(E);
8145 GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress(CGF));
8146 }
8147 if (isa<OMPCapturedExprDecl>(Val: VD)) {
8148 // Emit only those that were not explicitly referenced in clauses.
8149 if (!CGF.LocalDeclMap.count(VD))
8150 CGF.EmitVarDecl(D: *VD);
8151 }
8152 }
8153 for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) {
8154 if (!C->getNumForLoops())
8155 continue;
8156 for (unsigned I = LD->getLoopsNumber(),
8157 E = C->getLoopNumIterations().size();
8158 I < E; ++I) {
8159 if (const auto *VD = dyn_cast<OMPCapturedExprDecl>(
8160 cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) {
8161 // Emit only those that were not explicitly referenced in clauses.
8162 if (!CGF.LocalDeclMap.count(Val: VD))
8163 CGF.EmitVarDecl(D: *VD);
8164 }
8165 }
8166 }
8167 }
8168 (void)GlobalsScope.Privatize();
8169 CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt());
8170 }
8171 };
8172 if (D.getDirectiveKind() == OMPD_atomic ||
8173 D.getDirectiveKind() == OMPD_critical ||
8174 D.getDirectiveKind() == OMPD_section ||
8175 D.getDirectiveKind() == OMPD_master ||
8176 D.getDirectiveKind() == OMPD_masked ||
8177 D.getDirectiveKind() == OMPD_unroll) {
8178 EmitStmt(S: D.getAssociatedStmt());
8179 } else {
8180 auto LPCRegion =
8181 CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D);
8182 OMPSimdLexicalScope Scope(*this, D);
8183 CGM.getOpenMPRuntime().emitInlinedDirective(
8184 *this,
8185 isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd
8186 : D.getDirectiveKind(),
8187 CodeGen);
8188 }
8189 // Check for outer lastprivate conditional update.
8190 checkForLastprivateConditionalUpdate(CGF&: *this, S: D);
8191}
8192

source code of clang/lib/CodeGen/CGStmtOpenMP.cpp