1 | //===--- CGStmtOpenMP.cpp - Emit LLVM Code from Statements ----------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit OpenMP nodes as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGCleanup.h" |
14 | #include "CGOpenMPRuntime.h" |
15 | #include "CodeGenFunction.h" |
16 | #include "CodeGenModule.h" |
17 | #include "TargetInfo.h" |
18 | #include "clang/AST/ASTContext.h" |
19 | #include "clang/AST/Attr.h" |
20 | #include "clang/AST/DeclOpenMP.h" |
21 | #include "clang/AST/OpenMPClause.h" |
22 | #include "clang/AST/Stmt.h" |
23 | #include "clang/AST/StmtOpenMP.h" |
24 | #include "clang/AST/StmtVisitor.h" |
25 | #include "clang/Basic/OpenMPKinds.h" |
26 | #include "clang/Basic/PrettyStackTrace.h" |
27 | #include "clang/Basic/SourceManager.h" |
28 | #include "llvm/ADT/SmallSet.h" |
29 | #include "llvm/BinaryFormat/Dwarf.h" |
30 | #include "llvm/Frontend/OpenMP/OMPConstants.h" |
31 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" |
32 | #include "llvm/IR/Constants.h" |
33 | #include "llvm/IR/DebugInfoMetadata.h" |
34 | #include "llvm/IR/Instructions.h" |
35 | #include "llvm/IR/IntrinsicInst.h" |
36 | #include "llvm/IR/Metadata.h" |
37 | #include "llvm/Support/AtomicOrdering.h" |
38 | #include "llvm/Support/Debug.h" |
39 | #include <optional> |
40 | using namespace clang; |
41 | using namespace CodeGen; |
42 | using namespace llvm::omp; |
43 | |
44 | #define TTL_CODEGEN_TYPE "target-teams-loop-codegen" |
45 | |
46 | static const VarDecl *getBaseDecl(const Expr *Ref); |
47 | |
48 | namespace { |
49 | /// Lexical scope for OpenMP executable constructs, that handles correct codegen |
50 | /// for captured expressions. |
51 | class OMPLexicalScope : public CodeGenFunction::LexicalScope { |
52 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
53 | for (const auto *C : S.clauses()) { |
54 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
55 | if (const auto *PreInit = |
56 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
57 | for (const auto *I : PreInit->decls()) { |
58 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
59 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
60 | } else { |
61 | CodeGenFunction::AutoVarEmission Emission = |
62 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
63 | CGF.EmitAutoVarCleanups(emission: Emission); |
64 | } |
65 | } |
66 | } |
67 | } |
68 | } |
69 | } |
70 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
71 | |
72 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
73 | return CGF.LambdaCaptureFields.lookup(VD) || |
74 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
75 | (CGF.CurCodeDecl && isa<BlockDecl>(Val: CGF.CurCodeDecl) && |
76 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
77 | } |
78 | |
79 | public: |
80 | OMPLexicalScope( |
81 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
82 | const std::optional<OpenMPDirectiveKind> CapturedRegion = std::nullopt, |
83 | const bool EmitPreInitStmt = true) |
84 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
85 | InlinedShareds(CGF) { |
86 | if (EmitPreInitStmt) |
87 | emitPreInitStmt(CGF, S); |
88 | if (!CapturedRegion) |
89 | return; |
90 | assert(S.hasAssociatedStmt() && |
91 | "Expected associated statement for inlined directive." ); |
92 | const CapturedStmt *CS = S.getCapturedStmt(*CapturedRegion); |
93 | for (const auto &C : CS->captures()) { |
94 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
95 | auto *VD = C.getCapturedVar(); |
96 | assert(VD == VD->getCanonicalDecl() && |
97 | "Canonical decl must be captured." ); |
98 | DeclRefExpr DRE( |
99 | CGF.getContext(), const_cast<VarDecl *>(VD), |
100 | isCapturedVar(CGF, VD) || (CGF.CapturedStmtInfo && |
101 | InlinedShareds.isGlobalVarCaptured(VD)), |
102 | VD->getType().getNonReferenceType(), VK_LValue, C.getLocation()); |
103 | InlinedShareds.addPrivate(VD, CGF.EmitLValue(&DRE).getAddress(CGF)); |
104 | } |
105 | } |
106 | (void)InlinedShareds.Privatize(); |
107 | } |
108 | }; |
109 | |
110 | /// Lexical scope for OpenMP parallel construct, that handles correct codegen |
111 | /// for captured expressions. |
112 | class OMPParallelScope final : public OMPLexicalScope { |
113 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
114 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
115 | return !(isOpenMPTargetExecutionDirective(Kind) || |
116 | isOpenMPLoopBoundSharingDirective(Kind)) && |
117 | isOpenMPParallelDirective(Kind); |
118 | } |
119 | |
120 | public: |
121 | OMPParallelScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
122 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
123 | EmitPreInitStmt(S)) {} |
124 | }; |
125 | |
126 | /// Lexical scope for OpenMP teams construct, that handles correct codegen |
127 | /// for captured expressions. |
128 | class OMPTeamsScope final : public OMPLexicalScope { |
129 | bool EmitPreInitStmt(const OMPExecutableDirective &S) { |
130 | OpenMPDirectiveKind Kind = S.getDirectiveKind(); |
131 | return !isOpenMPTargetExecutionDirective(Kind) && |
132 | isOpenMPTeamsDirective(Kind); |
133 | } |
134 | |
135 | public: |
136 | OMPTeamsScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
137 | : OMPLexicalScope(CGF, S, /*CapturedRegion=*/std::nullopt, |
138 | EmitPreInitStmt(S)) {} |
139 | }; |
140 | |
141 | /// Private scope for OpenMP loop-based directives, that supports capturing |
142 | /// of used expression from loop statement. |
143 | class OMPLoopScope : public CodeGenFunction::RunCleanupsScope { |
144 | void emitPreInitStmt(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) { |
145 | const DeclStmt *PreInits; |
146 | CodeGenFunction::OMPMapVars PreCondVars; |
147 | if (auto *LD = dyn_cast<OMPLoopDirective>(Val: &S)) { |
148 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
149 | for (const auto *E : LD->counters()) { |
150 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
151 | EmittedAsPrivate.insert(V: VD->getCanonicalDecl()); |
152 | (void)PreCondVars.setVarAddr( |
153 | CGF, LocalVD: VD, TempAddr: CGF.CreateMemTemp(VD->getType().getNonReferenceType())); |
154 | } |
155 | // Mark private vars as undefs. |
156 | for (const auto *C : LD->getClausesOfKind<OMPPrivateClause>()) { |
157 | for (const Expr *IRef : C->varlists()) { |
158 | const auto *OrigVD = |
159 | cast<VarDecl>(cast<DeclRefExpr>(IRef)->getDecl()); |
160 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
161 | QualType OrigVDTy = OrigVD->getType().getNonReferenceType(); |
162 | (void)PreCondVars.setVarAddr( |
163 | CGF, OrigVD, |
164 | Address(llvm::UndefValue::get(CGF.ConvertTypeForMem( |
165 | CGF.getContext().getPointerType(OrigVDTy))), |
166 | CGF.ConvertTypeForMem(OrigVDTy), |
167 | CGF.getContext().getDeclAlign(OrigVD))); |
168 | } |
169 | } |
170 | } |
171 | (void)PreCondVars.apply(CGF); |
172 | // Emit init, __range and __end variables for C++ range loops. |
173 | (void)OMPLoopBasedDirective::doForAllLoops( |
174 | LD->getInnermostCapturedStmt()->getCapturedStmt(), |
175 | /*TryImperfectlyNestedLoops=*/true, LD->getLoopsNumber(), |
176 | [&CGF](unsigned Cnt, const Stmt *CurStmt) { |
177 | if (const auto *CXXFor = dyn_cast<CXXForRangeStmt>(Val: CurStmt)) { |
178 | if (const Stmt *Init = CXXFor->getInit()) |
179 | CGF.EmitStmt(S: Init); |
180 | CGF.EmitStmt(S: CXXFor->getRangeStmt()); |
181 | CGF.EmitStmt(S: CXXFor->getEndStmt()); |
182 | } |
183 | return false; |
184 | }); |
185 | PreInits = cast_or_null<DeclStmt>(Val: LD->getPreInits()); |
186 | } else if (const auto *Tile = dyn_cast<OMPTileDirective>(Val: &S)) { |
187 | PreInits = cast_or_null<DeclStmt>(Val: Tile->getPreInits()); |
188 | } else if (const auto *Unroll = dyn_cast<OMPUnrollDirective>(Val: &S)) { |
189 | PreInits = cast_or_null<DeclStmt>(Val: Unroll->getPreInits()); |
190 | } else { |
191 | llvm_unreachable("Unknown loop-based directive kind." ); |
192 | } |
193 | if (PreInits) { |
194 | for (const auto *I : PreInits->decls()) |
195 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
196 | } |
197 | PreCondVars.restore(CGF); |
198 | } |
199 | |
200 | public: |
201 | OMPLoopScope(CodeGenFunction &CGF, const OMPLoopBasedDirective &S) |
202 | : CodeGenFunction::RunCleanupsScope(CGF) { |
203 | emitPreInitStmt(CGF, S); |
204 | } |
205 | }; |
206 | |
207 | class OMPSimdLexicalScope : public CodeGenFunction::LexicalScope { |
208 | CodeGenFunction::OMPPrivateScope InlinedShareds; |
209 | |
210 | static bool isCapturedVar(CodeGenFunction &CGF, const VarDecl *VD) { |
211 | return CGF.LambdaCaptureFields.lookup(VD) || |
212 | (CGF.CapturedStmtInfo && CGF.CapturedStmtInfo->lookup(VD)) || |
213 | (CGF.CurCodeDecl && isa<BlockDecl>(Val: CGF.CurCodeDecl) && |
214 | cast<BlockDecl>(Val: CGF.CurCodeDecl)->capturesVariable(var: VD)); |
215 | } |
216 | |
217 | public: |
218 | OMPSimdLexicalScope(CodeGenFunction &CGF, const OMPExecutableDirective &S) |
219 | : CodeGenFunction::LexicalScope(CGF, S.getSourceRange()), |
220 | InlinedShareds(CGF) { |
221 | for (const auto *C : S.clauses()) { |
222 | if (const auto *CPI = OMPClauseWithPreInit::get(C)) { |
223 | if (const auto *PreInit = |
224 | cast_or_null<DeclStmt>(Val: CPI->getPreInitStmt())) { |
225 | for (const auto *I : PreInit->decls()) { |
226 | if (!I->hasAttr<OMPCaptureNoInitAttr>()) { |
227 | CGF.EmitVarDecl(D: cast<VarDecl>(Val: *I)); |
228 | } else { |
229 | CodeGenFunction::AutoVarEmission Emission = |
230 | CGF.EmitAutoVarAlloca(var: cast<VarDecl>(Val: *I)); |
231 | CGF.EmitAutoVarCleanups(emission: Emission); |
232 | } |
233 | } |
234 | } |
235 | } else if (const auto *UDP = dyn_cast<OMPUseDevicePtrClause>(Val: C)) { |
236 | for (const Expr *E : UDP->varlists()) { |
237 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
238 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
239 | CGF.EmitVarDecl(*OED); |
240 | } |
241 | } else if (const auto *UDP = dyn_cast<OMPUseDeviceAddrClause>(Val: C)) { |
242 | for (const Expr *E : UDP->varlists()) { |
243 | const Decl *D = getBaseDecl(E); |
244 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
245 | CGF.EmitVarDecl(*OED); |
246 | } |
247 | } |
248 | } |
249 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) |
250 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: InlinedShareds); |
251 | if (const auto *TG = dyn_cast<OMPTaskgroupDirective>(Val: &S)) { |
252 | if (const Expr *E = TG->getReductionRef()) |
253 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl())); |
254 | } |
255 | // Temp copy arrays for inscan reductions should not be emitted as they are |
256 | // not used in simd only mode. |
257 | llvm::DenseSet<CanonicalDeclPtr<const Decl>> CopyArrayTemps; |
258 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
259 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
260 | continue; |
261 | for (const Expr *E : C->copy_array_temps()) |
262 | CopyArrayTemps.insert(cast<DeclRefExpr>(Val: E)->getDecl()); |
263 | } |
264 | const auto *CS = cast_or_null<CapturedStmt>(Val: S.getAssociatedStmt()); |
265 | while (CS) { |
266 | for (auto &C : CS->captures()) { |
267 | if (C.capturesVariable() || C.capturesVariableByCopy()) { |
268 | auto *VD = C.getCapturedVar(); |
269 | if (CopyArrayTemps.contains(VD)) |
270 | continue; |
271 | assert(VD == VD->getCanonicalDecl() && |
272 | "Canonical decl must be captured." ); |
273 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(VD), |
274 | isCapturedVar(CGF, VD) || |
275 | (CGF.CapturedStmtInfo && |
276 | InlinedShareds.isGlobalVarCaptured(VD)), |
277 | VD->getType().getNonReferenceType(), VK_LValue, |
278 | C.getLocation()); |
279 | InlinedShareds.addPrivate(LocalVD: VD, Addr: CGF.EmitLValue(&DRE).getAddress(CGF)); |
280 | } |
281 | } |
282 | CS = dyn_cast<CapturedStmt>(Val: CS->getCapturedStmt()); |
283 | } |
284 | (void)InlinedShareds.Privatize(); |
285 | } |
286 | }; |
287 | |
288 | } // namespace |
289 | |
290 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
291 | const OMPExecutableDirective &S, |
292 | const RegionCodeGenTy &CodeGen); |
293 | |
294 | LValue CodeGenFunction::EmitOMPSharedLValue(const Expr *E) { |
295 | if (const auto *OrigDRE = dyn_cast<DeclRefExpr>(Val: E)) { |
296 | if (const auto *OrigVD = dyn_cast<VarDecl>(Val: OrigDRE->getDecl())) { |
297 | OrigVD = OrigVD->getCanonicalDecl(); |
298 | bool IsCaptured = |
299 | LambdaCaptureFields.lookup(OrigVD) || |
300 | (CapturedStmtInfo && CapturedStmtInfo->lookup(VD: OrigVD)) || |
301 | (CurCodeDecl && isa<BlockDecl>(Val: CurCodeDecl)); |
302 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), IsCaptured, |
303 | OrigDRE->getType(), VK_LValue, OrigDRE->getExprLoc()); |
304 | return EmitLValue(&DRE); |
305 | } |
306 | } |
307 | return EmitLValue(E); |
308 | } |
309 | |
310 | llvm::Value *CodeGenFunction::getTypeSize(QualType Ty) { |
311 | ASTContext &C = getContext(); |
312 | llvm::Value *Size = nullptr; |
313 | auto SizeInChars = C.getTypeSizeInChars(T: Ty); |
314 | if (SizeInChars.isZero()) { |
315 | // getTypeSizeInChars() returns 0 for a VLA. |
316 | while (const VariableArrayType *VAT = C.getAsVariableArrayType(T: Ty)) { |
317 | VlaSizePair VlaSize = getVLASize(vla: VAT); |
318 | Ty = VlaSize.Type; |
319 | Size = |
320 | Size ? Builder.CreateNUWMul(LHS: Size, RHS: VlaSize.NumElts) : VlaSize.NumElts; |
321 | } |
322 | SizeInChars = C.getTypeSizeInChars(T: Ty); |
323 | if (SizeInChars.isZero()) |
324 | return llvm::ConstantInt::get(Ty: SizeTy, /*V=*/0); |
325 | return Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: SizeInChars)); |
326 | } |
327 | return CGM.getSize(numChars: SizeInChars); |
328 | } |
329 | |
330 | void CodeGenFunction::GenerateOpenMPCapturedVars( |
331 | const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars) { |
332 | const RecordDecl *RD = S.getCapturedRecordDecl(); |
333 | auto CurField = RD->field_begin(); |
334 | auto CurCap = S.captures().begin(); |
335 | for (CapturedStmt::const_capture_init_iterator I = S.capture_init_begin(), |
336 | E = S.capture_init_end(); |
337 | I != E; ++I, ++CurField, ++CurCap) { |
338 | if (CurField->hasCapturedVLAType()) { |
339 | const VariableArrayType *VAT = CurField->getCapturedVLAType(); |
340 | llvm::Value *Val = VLASizeMap[VAT->getSizeExpr()]; |
341 | CapturedVars.push_back(Elt: Val); |
342 | } else if (CurCap->capturesThis()) { |
343 | CapturedVars.push_back(Elt: CXXThisValue); |
344 | } else if (CurCap->capturesVariableByCopy()) { |
345 | llvm::Value *CV = EmitLoadOfScalar(lvalue: EmitLValue(E: *I), Loc: CurCap->getLocation()); |
346 | |
347 | // If the field is not a pointer, we need to save the actual value |
348 | // and load it as a void pointer. |
349 | if (!CurField->getType()->isAnyPointerType()) { |
350 | ASTContext &Ctx = getContext(); |
351 | Address DstAddr = CreateMemTemp( |
352 | T: Ctx.getUIntPtrType(), |
353 | Name: Twine(CurCap->getCapturedVar()->getName(), ".casted" )); |
354 | LValue DstLV = MakeAddrLValue(Addr: DstAddr, T: Ctx.getUIntPtrType()); |
355 | |
356 | llvm::Value *SrcAddrVal = EmitScalarConversion( |
357 | Src: DstAddr.emitRawPointer(CGF&: *this), |
358 | SrcTy: Ctx.getPointerType(T: Ctx.getUIntPtrType()), |
359 | DstTy: Ctx.getPointerType(CurField->getType()), Loc: CurCap->getLocation()); |
360 | LValue SrcLV = |
361 | MakeNaturalAlignAddrLValue(V: SrcAddrVal, T: CurField->getType()); |
362 | |
363 | // Store the value using the source type pointer. |
364 | EmitStoreThroughLValue(Src: RValue::get(V: CV), Dst: SrcLV); |
365 | |
366 | // Load the value using the destination type pointer. |
367 | CV = EmitLoadOfScalar(lvalue: DstLV, Loc: CurCap->getLocation()); |
368 | } |
369 | CapturedVars.push_back(Elt: CV); |
370 | } else { |
371 | assert(CurCap->capturesVariable() && "Expected capture by reference." ); |
372 | CapturedVars.push_back( |
373 | Elt: EmitLValue(E: *I).getAddress(CGF&: *this).emitRawPointer(CGF&: *this)); |
374 | } |
375 | } |
376 | } |
377 | |
378 | static Address castValueFromUintptr(CodeGenFunction &CGF, SourceLocation Loc, |
379 | QualType DstType, StringRef Name, |
380 | LValue AddrLV) { |
381 | ASTContext &Ctx = CGF.getContext(); |
382 | |
383 | llvm::Value *CastedPtr = CGF.EmitScalarConversion( |
384 | Src: AddrLV.getAddress(CGF).emitRawPointer(CGF), SrcTy: Ctx.getUIntPtrType(), |
385 | DstTy: Ctx.getPointerType(T: DstType), Loc); |
386 | // FIXME: should the pointee type (DstType) be passed? |
387 | Address TmpAddr = |
388 | CGF.MakeNaturalAlignAddrLValue(V: CastedPtr, T: DstType).getAddress(CGF); |
389 | return TmpAddr; |
390 | } |
391 | |
392 | static QualType getCanonicalParamType(ASTContext &C, QualType T) { |
393 | if (T->isLValueReferenceType()) |
394 | return C.getLValueReferenceType( |
395 | T: getCanonicalParamType(C, T: T.getNonReferenceType()), |
396 | /*SpelledAsLValue=*/false); |
397 | if (T->isPointerType()) |
398 | return C.getPointerType(T: getCanonicalParamType(C, T: T->getPointeeType())); |
399 | if (const ArrayType *A = T->getAsArrayTypeUnsafe()) { |
400 | if (const auto *VLA = dyn_cast<VariableArrayType>(A)) |
401 | return getCanonicalParamType(C, VLA->getElementType()); |
402 | if (!A->isVariablyModifiedType()) |
403 | return C.getCanonicalType(T); |
404 | } |
405 | return C.getCanonicalParamType(T); |
406 | } |
407 | |
408 | namespace { |
409 | /// Contains required data for proper outlined function codegen. |
410 | struct FunctionOptions { |
411 | /// Captured statement for which the function is generated. |
412 | const CapturedStmt *S = nullptr; |
413 | /// true if cast to/from UIntPtr is required for variables captured by |
414 | /// value. |
415 | const bool UIntPtrCastRequired = true; |
416 | /// true if only casted arguments must be registered as local args or VLA |
417 | /// sizes. |
418 | const bool RegisterCastedArgsOnly = false; |
419 | /// Name of the generated function. |
420 | const StringRef FunctionName; |
421 | /// Location of the non-debug version of the outlined function. |
422 | SourceLocation Loc; |
423 | explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, |
424 | bool RegisterCastedArgsOnly, StringRef FunctionName, |
425 | SourceLocation Loc) |
426 | : S(S), UIntPtrCastRequired(UIntPtrCastRequired), |
427 | RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), |
428 | FunctionName(FunctionName), Loc(Loc) {} |
429 | }; |
430 | } // namespace |
431 | |
432 | static llvm::Function *emitOutlinedFunctionPrologue( |
433 | CodeGenFunction &CGF, FunctionArgList &Args, |
434 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> |
435 | &LocalAddrs, |
436 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> |
437 | &VLASizes, |
438 | llvm::Value *&CXXThisValue, const FunctionOptions &FO) { |
439 | const CapturedDecl *CD = FO.S->getCapturedDecl(); |
440 | const RecordDecl *RD = FO.S->getCapturedRecordDecl(); |
441 | assert(CD->hasBody() && "missing CapturedDecl body" ); |
442 | |
443 | CXXThisValue = nullptr; |
444 | // Build the argument list. |
445 | CodeGenModule &CGM = CGF.CGM; |
446 | ASTContext &Ctx = CGM.getContext(); |
447 | FunctionArgList TargetArgs; |
448 | Args.append(in_start: CD->param_begin(), |
449 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
450 | TargetArgs.append( |
451 | in_start: CD->param_begin(), |
452 | in_end: std::next(x: CD->param_begin(), n: CD->getContextParamPosition())); |
453 | auto I = FO.S->captures().begin(); |
454 | FunctionDecl *DebugFunctionDecl = nullptr; |
455 | if (!FO.UIntPtrCastRequired) { |
456 | FunctionProtoType::ExtProtoInfo EPI; |
457 | QualType FunctionTy = Ctx.getFunctionType(ResultTy: Ctx.VoidTy, Args: std::nullopt, EPI); |
458 | DebugFunctionDecl = FunctionDecl::Create( |
459 | Ctx, Ctx.getTranslationUnitDecl(), FO.S->getBeginLoc(), |
460 | SourceLocation(), DeclarationName(), FunctionTy, |
461 | Ctx.getTrivialTypeSourceInfo(T: FunctionTy), SC_Static, |
462 | /*UsesFPIntrin=*/false, /*isInlineSpecified=*/false, |
463 | /*hasWrittenPrototype=*/false); |
464 | } |
465 | for (const FieldDecl *FD : RD->fields()) { |
466 | QualType ArgType = FD->getType(); |
467 | IdentifierInfo *II = nullptr; |
468 | VarDecl *CapVar = nullptr; |
469 | |
470 | // If this is a capture by copy and the type is not a pointer, the outlined |
471 | // function argument type should be uintptr and the value properly casted to |
472 | // uintptr. This is necessary given that the runtime library is only able to |
473 | // deal with pointers. We can pass in the same way the VLA type sizes to the |
474 | // outlined function. |
475 | if (FO.UIntPtrCastRequired && |
476 | ((I->capturesVariableByCopy() && !ArgType->isAnyPointerType()) || |
477 | I->capturesVariableArrayType())) |
478 | ArgType = Ctx.getUIntPtrType(); |
479 | |
480 | if (I->capturesVariable() || I->capturesVariableByCopy()) { |
481 | CapVar = I->getCapturedVar(); |
482 | II = CapVar->getIdentifier(); |
483 | } else if (I->capturesThis()) { |
484 | II = &Ctx.Idents.get(Name: "this" ); |
485 | } else { |
486 | assert(I->capturesVariableArrayType()); |
487 | II = &Ctx.Idents.get(Name: "vla" ); |
488 | } |
489 | if (ArgType->isVariablyModifiedType()) |
490 | ArgType = getCanonicalParamType(C&: Ctx, T: ArgType); |
491 | VarDecl *Arg; |
492 | if (CapVar && (CapVar->getTLSKind() != clang::VarDecl::TLS_None)) { |
493 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
494 | II, ArgType, |
495 | ImplicitParamKind::ThreadPrivateVar); |
496 | } else if (DebugFunctionDecl && (CapVar || I->capturesThis())) { |
497 | Arg = ParmVarDecl::Create( |
498 | C&: Ctx, DC: DebugFunctionDecl, |
499 | StartLoc: CapVar ? CapVar->getBeginLoc() : FD->getBeginLoc(), |
500 | IdLoc: CapVar ? CapVar->getLocation() : FD->getLocation(), Id: II, T: ArgType, |
501 | /*TInfo=*/nullptr, S: SC_None, /*DefArg=*/nullptr); |
502 | } else { |
503 | Arg = ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), |
504 | II, ArgType, ImplicitParamKind::Other); |
505 | } |
506 | Args.emplace_back(Args&: Arg); |
507 | // Do not cast arguments if we emit function with non-original types. |
508 | TargetArgs.emplace_back( |
509 | Args: FO.UIntPtrCastRequired |
510 | ? Arg |
511 | : CGM.getOpenMPRuntime().translateParameter(FD, NativeParam: Arg)); |
512 | ++I; |
513 | } |
514 | Args.append(in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
515 | in_end: CD->param_end()); |
516 | TargetArgs.append( |
517 | in_start: std::next(x: CD->param_begin(), n: CD->getContextParamPosition() + 1), |
518 | in_end: CD->param_end()); |
519 | |
520 | // Create the function declaration. |
521 | const CGFunctionInfo &FuncInfo = |
522 | CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); |
523 | llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(Info: FuncInfo); |
524 | |
525 | auto *F = |
526 | llvm::Function::Create(Ty: FuncLLVMTy, Linkage: llvm::GlobalValue::InternalLinkage, |
527 | N: FO.FunctionName, M: &CGM.getModule()); |
528 | CGM.SetInternalFunctionAttributes(GD: CD, F: F, FI: FuncInfo); |
529 | if (CD->isNothrow()) |
530 | F->setDoesNotThrow(); |
531 | F->setDoesNotRecurse(); |
532 | |
533 | // Always inline the outlined function if optimizations are enabled. |
534 | if (CGM.getCodeGenOpts().OptimizationLevel != 0) { |
535 | F->removeFnAttr(llvm::Attribute::NoInline); |
536 | F->addFnAttr(llvm::Attribute::AlwaysInline); |
537 | } |
538 | |
539 | // Generate the function. |
540 | CGF.StartFunction(GD: CD, RetTy: Ctx.VoidTy, Fn: F, FnInfo: FuncInfo, Args: TargetArgs, |
541 | Loc: FO.UIntPtrCastRequired ? FO.Loc : FO.S->getBeginLoc(), |
542 | StartLoc: FO.UIntPtrCastRequired ? FO.Loc |
543 | : CD->getBody()->getBeginLoc()); |
544 | unsigned Cnt = CD->getContextParamPosition(); |
545 | I = FO.S->captures().begin(); |
546 | for (const FieldDecl *FD : RD->fields()) { |
547 | // Do not map arguments if we emit function with non-original types. |
548 | Address LocalAddr(Address::invalid()); |
549 | if (!FO.UIntPtrCastRequired && Args[Cnt] != TargetArgs[Cnt]) { |
550 | LocalAddr = CGM.getOpenMPRuntime().getParameterAddress(CGF, NativeParam: Args[Cnt], |
551 | TargetParam: TargetArgs[Cnt]); |
552 | } else { |
553 | LocalAddr = CGF.GetAddrOfLocalVar(VD: Args[Cnt]); |
554 | } |
555 | // If we are capturing a pointer by copy we don't need to do anything, just |
556 | // use the value that we get from the arguments. |
557 | if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { |
558 | const VarDecl *CurVD = I->getCapturedVar(); |
559 | if (!FO.RegisterCastedArgsOnly) |
560 | LocalAddrs.insert({Args[Cnt], {CurVD, LocalAddr}}); |
561 | ++Cnt; |
562 | ++I; |
563 | continue; |
564 | } |
565 | |
566 | LValue ArgLVal = CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), |
567 | AlignmentSource::Decl); |
568 | if (FD->hasCapturedVLAType()) { |
569 | if (FO.UIntPtrCastRequired) { |
570 | ArgLVal = CGF.MakeAddrLValue( |
571 | castValueFromUintptr(CGF, I->getLocation(), FD->getType(), |
572 | Args[Cnt]->getName(), ArgLVal), |
573 | FD->getType(), AlignmentSource::Decl); |
574 | } |
575 | llvm::Value *ExprArg = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
576 | const VariableArrayType *VAT = FD->getCapturedVLAType(); |
577 | VLASizes.try_emplace(Args[Cnt], VAT->getSizeExpr(), ExprArg); |
578 | } else if (I->capturesVariable()) { |
579 | const VarDecl *Var = I->getCapturedVar(); |
580 | QualType VarTy = Var->getType(); |
581 | Address ArgAddr = ArgLVal.getAddress(CGF); |
582 | if (ArgLVal.getType()->isLValueReferenceType()) { |
583 | ArgAddr = CGF.EmitLoadOfReference(RefLVal: ArgLVal); |
584 | } else if (!VarTy->isVariablyModifiedType() || !VarTy->isPointerType()) { |
585 | assert(ArgLVal.getType()->isPointerType()); |
586 | ArgAddr = CGF.EmitLoadOfPointer( |
587 | Ptr: ArgAddr, PtrTy: ArgLVal.getType()->castAs<PointerType>()); |
588 | } |
589 | if (!FO.RegisterCastedArgsOnly) { |
590 | LocalAddrs.insert( |
591 | {Args[Cnt], {Var, ArgAddr.withAlignment(NewAlignment: Ctx.getDeclAlign(Var))}}); |
592 | } |
593 | } else if (I->capturesVariableByCopy()) { |
594 | assert(!FD->getType()->isAnyPointerType() && |
595 | "Not expecting a captured pointer." ); |
596 | const VarDecl *Var = I->getCapturedVar(); |
597 | LocalAddrs.insert({Args[Cnt], |
598 | {Var, FO.UIntPtrCastRequired |
599 | ? castValueFromUintptr( |
600 | CGF, I->getLocation(), FD->getType(), |
601 | Args[Cnt]->getName(), ArgLVal) |
602 | : ArgLVal.getAddress(CGF)}}); |
603 | } else { |
604 | // If 'this' is captured, load it into CXXThisValue. |
605 | assert(I->capturesThis()); |
606 | CXXThisValue = CGF.EmitLoadOfScalar(lvalue: ArgLVal, Loc: I->getLocation()); |
607 | LocalAddrs.insert({Args[Cnt], {nullptr, ArgLVal.getAddress(CGF)}}); |
608 | } |
609 | ++Cnt; |
610 | ++I; |
611 | } |
612 | |
613 | return F; |
614 | } |
615 | |
616 | llvm::Function * |
617 | CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S, |
618 | SourceLocation Loc) { |
619 | assert( |
620 | CapturedStmtInfo && |
621 | "CapturedStmtInfo should be set when generating the captured function" ); |
622 | const CapturedDecl *CD = S.getCapturedDecl(); |
623 | // Build the argument list. |
624 | bool NeedWrapperFunction = |
625 | getDebugInfo() && CGM.getCodeGenOpts().hasReducedDebugInfo(); |
626 | FunctionArgList Args; |
627 | llvm::MapVector<const Decl *, std::pair<const VarDecl *, Address>> LocalAddrs; |
628 | llvm::DenseMap<const Decl *, std::pair<const Expr *, llvm::Value *>> VLASizes; |
629 | SmallString<256> Buffer; |
630 | llvm::raw_svector_ostream Out(Buffer); |
631 | Out << CapturedStmtInfo->getHelperName(); |
632 | if (NeedWrapperFunction) |
633 | Out << "_debug__" ; |
634 | FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, |
635 | Out.str(), Loc); |
636 | llvm::Function *F = emitOutlinedFunctionPrologue(CGF&: *this, Args, LocalAddrs, |
637 | VLASizes, CXXThisValue, FO); |
638 | CodeGenFunction::OMPPrivateScope LocalScope(*this); |
639 | for (const auto &LocalAddrPair : LocalAddrs) { |
640 | if (LocalAddrPair.second.first) { |
641 | LocalScope.addPrivate(LocalVD: LocalAddrPair.second.first, |
642 | Addr: LocalAddrPair.second.second); |
643 | } |
644 | } |
645 | (void)LocalScope.Privatize(); |
646 | for (const auto &VLASizePair : VLASizes) |
647 | VLASizeMap[VLASizePair.second.first] = VLASizePair.second.second; |
648 | PGO.assignRegionCounters(GD: GlobalDecl(CD), Fn: F); |
649 | CapturedStmtInfo->EmitBody(CGF&: *this, S: CD->getBody()); |
650 | (void)LocalScope.ForceCleanup(); |
651 | FinishFunction(EndLoc: CD->getBodyRBrace()); |
652 | if (!NeedWrapperFunction) |
653 | return F; |
654 | |
655 | FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, |
656 | /*RegisterCastedArgsOnly=*/true, |
657 | CapturedStmtInfo->getHelperName(), Loc); |
658 | CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); |
659 | WrapperCGF.CapturedStmtInfo = CapturedStmtInfo; |
660 | Args.clear(); |
661 | LocalAddrs.clear(); |
662 | VLASizes.clear(); |
663 | llvm::Function *WrapperF = |
664 | emitOutlinedFunctionPrologue(CGF&: WrapperCGF, Args, LocalAddrs, VLASizes, |
665 | CXXThisValue&: WrapperCGF.CXXThisValue, FO: WrapperFO); |
666 | llvm::SmallVector<llvm::Value *, 4> CallArgs; |
667 | auto *PI = F->arg_begin(); |
668 | for (const auto *Arg : Args) { |
669 | llvm::Value *CallArg; |
670 | auto I = LocalAddrs.find(Arg); |
671 | if (I != LocalAddrs.end()) { |
672 | LValue LV = WrapperCGF.MakeAddrLValue( |
673 | I->second.second, |
674 | I->second.first ? I->second.first->getType() : Arg->getType(), |
675 | AlignmentSource::Decl); |
676 | if (LV.getType()->isAnyComplexType()) |
677 | LV.setAddress(LV.getAddress(CGF&: WrapperCGF).withElementType(ElemTy: PI->getType())); |
678 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
679 | } else { |
680 | auto EI = VLASizes.find(Arg); |
681 | if (EI != VLASizes.end()) { |
682 | CallArg = EI->second.second; |
683 | } else { |
684 | LValue LV = |
685 | WrapperCGF.MakeAddrLValue(WrapperCGF.GetAddrOfLocalVar(VD: Arg), |
686 | Arg->getType(), AlignmentSource::Decl); |
687 | CallArg = WrapperCGF.EmitLoadOfScalar(lvalue: LV, Loc: S.getBeginLoc()); |
688 | } |
689 | } |
690 | CallArgs.emplace_back(WrapperCGF.EmitFromMemory(Value: CallArg, Ty: Arg->getType())); |
691 | ++PI; |
692 | } |
693 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF&: WrapperCGF, Loc, OutlinedFn: F, Args: CallArgs); |
694 | WrapperCGF.FinishFunction(); |
695 | return WrapperF; |
696 | } |
697 | |
698 | //===----------------------------------------------------------------------===// |
699 | // OpenMP Directive Emission |
700 | //===----------------------------------------------------------------------===// |
701 | void CodeGenFunction::EmitOMPAggregateAssign( |
702 | Address DestAddr, Address SrcAddr, QualType OriginalType, |
703 | const llvm::function_ref<void(Address, Address)> CopyGen) { |
704 | // Perform element-by-element initialization. |
705 | QualType ElementTy; |
706 | |
707 | // Drill down to the base element type on both arrays. |
708 | const ArrayType *ArrayTy = OriginalType->getAsArrayTypeUnsafe(); |
709 | llvm::Value *NumElements = emitArrayLength(arrayType: ArrayTy, baseType&: ElementTy, addr&: DestAddr); |
710 | SrcAddr = SrcAddr.withElementType(ElemTy: DestAddr.getElementType()); |
711 | |
712 | llvm::Value *SrcBegin = SrcAddr.emitRawPointer(CGF&: *this); |
713 | llvm::Value *DestBegin = DestAddr.emitRawPointer(CGF&: *this); |
714 | // Cast from pointer to array type to pointer to single element. |
715 | llvm::Value *DestEnd = Builder.CreateInBoundsGEP(Ty: DestAddr.getElementType(), |
716 | Ptr: DestBegin, IdxList: NumElements); |
717 | |
718 | // The basic structure here is a while-do loop. |
719 | llvm::BasicBlock *BodyBB = createBasicBlock(name: "omp.arraycpy.body" ); |
720 | llvm::BasicBlock *DoneBB = createBasicBlock(name: "omp.arraycpy.done" ); |
721 | llvm::Value *IsEmpty = |
722 | Builder.CreateICmpEQ(LHS: DestBegin, RHS: DestEnd, Name: "omp.arraycpy.isempty" ); |
723 | Builder.CreateCondBr(Cond: IsEmpty, True: DoneBB, False: BodyBB); |
724 | |
725 | // Enter the loop body, making that address the current address. |
726 | llvm::BasicBlock *EntryBB = Builder.GetInsertBlock(); |
727 | EmitBlock(BB: BodyBB); |
728 | |
729 | CharUnits ElementSize = getContext().getTypeSizeInChars(T: ElementTy); |
730 | |
731 | llvm::PHINode *SrcElementPHI = |
732 | Builder.CreatePHI(Ty: SrcBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.srcElementPast" ); |
733 | SrcElementPHI->addIncoming(V: SrcBegin, BB: EntryBB); |
734 | Address SrcElementCurrent = |
735 | Address(SrcElementPHI, SrcAddr.getElementType(), |
736 | SrcAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
737 | |
738 | llvm::PHINode *DestElementPHI = Builder.CreatePHI( |
739 | Ty: DestBegin->getType(), NumReservedValues: 2, Name: "omp.arraycpy.destElementPast" ); |
740 | DestElementPHI->addIncoming(V: DestBegin, BB: EntryBB); |
741 | Address DestElementCurrent = |
742 | Address(DestElementPHI, DestAddr.getElementType(), |
743 | DestAddr.getAlignment().alignmentOfArrayElement(elementSize: ElementSize)); |
744 | |
745 | // Emit copy. |
746 | CopyGen(DestElementCurrent, SrcElementCurrent); |
747 | |
748 | // Shift the address forward by one element. |
749 | llvm::Value *DestElementNext = |
750 | Builder.CreateConstGEP1_32(Ty: DestAddr.getElementType(), Ptr: DestElementPHI, |
751 | /*Idx0=*/1, Name: "omp.arraycpy.dest.element" ); |
752 | llvm::Value *SrcElementNext = |
753 | Builder.CreateConstGEP1_32(Ty: SrcAddr.getElementType(), Ptr: SrcElementPHI, |
754 | /*Idx0=*/1, Name: "omp.arraycpy.src.element" ); |
755 | // Check whether we've reached the end. |
756 | llvm::Value *Done = |
757 | Builder.CreateICmpEQ(LHS: DestElementNext, RHS: DestEnd, Name: "omp.arraycpy.done" ); |
758 | Builder.CreateCondBr(Cond: Done, True: DoneBB, False: BodyBB); |
759 | DestElementPHI->addIncoming(V: DestElementNext, BB: Builder.GetInsertBlock()); |
760 | SrcElementPHI->addIncoming(V: SrcElementNext, BB: Builder.GetInsertBlock()); |
761 | |
762 | // Done. |
763 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
764 | } |
765 | |
766 | void CodeGenFunction::EmitOMPCopy(QualType OriginalType, Address DestAddr, |
767 | Address SrcAddr, const VarDecl *DestVD, |
768 | const VarDecl *SrcVD, const Expr *Copy) { |
769 | if (OriginalType->isArrayType()) { |
770 | const auto *BO = dyn_cast<BinaryOperator>(Val: Copy); |
771 | if (BO && BO->getOpcode() == BO_Assign) { |
772 | // Perform simple memcpy for simple copying. |
773 | LValue Dest = MakeAddrLValue(Addr: DestAddr, T: OriginalType); |
774 | LValue Src = MakeAddrLValue(Addr: SrcAddr, T: OriginalType); |
775 | EmitAggregateAssign(Dest, Src, EltTy: OriginalType); |
776 | } else { |
777 | // For arrays with complex element types perform element by element |
778 | // copying. |
779 | EmitOMPAggregateAssign( |
780 | DestAddr, SrcAddr, OriginalType, |
781 | CopyGen: [this, Copy, SrcVD, DestVD](Address DestElement, Address SrcElement) { |
782 | // Working with the single array element, so have to remap |
783 | // destination and source variables to corresponding array |
784 | // elements. |
785 | CodeGenFunction::OMPPrivateScope Remap(*this); |
786 | Remap.addPrivate(LocalVD: DestVD, Addr: DestElement); |
787 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcElement); |
788 | (void)Remap.Privatize(); |
789 | EmitIgnoredExpr(E: Copy); |
790 | }); |
791 | } |
792 | } else { |
793 | // Remap pseudo source variable to private copy. |
794 | CodeGenFunction::OMPPrivateScope Remap(*this); |
795 | Remap.addPrivate(LocalVD: SrcVD, Addr: SrcAddr); |
796 | Remap.addPrivate(LocalVD: DestVD, Addr: DestAddr); |
797 | (void)Remap.Privatize(); |
798 | // Emit copying of the whole variable. |
799 | EmitIgnoredExpr(E: Copy); |
800 | } |
801 | } |
802 | |
803 | bool CodeGenFunction::EmitOMPFirstprivateClause(const OMPExecutableDirective &D, |
804 | OMPPrivateScope &PrivateScope) { |
805 | if (!HaveInsertPoint()) |
806 | return false; |
807 | bool DeviceConstTarget = |
808 | getLangOpts().OpenMPIsTargetDevice && |
809 | isOpenMPTargetExecutionDirective(D.getDirectiveKind()); |
810 | bool FirstprivateIsLastprivate = false; |
811 | llvm::DenseMap<const VarDecl *, OpenMPLastprivateModifier> Lastprivates; |
812 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
813 | for (const auto *D : C->varlists()) |
814 | Lastprivates.try_emplace( |
815 | cast<VarDecl>(cast<DeclRefExpr>(D)->getDecl())->getCanonicalDecl(), |
816 | C->getKind()); |
817 | } |
818 | llvm::DenseSet<const VarDecl *> EmittedAsFirstprivate; |
819 | llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions; |
820 | getOpenMPCaptureRegions(CaptureRegions, D.getDirectiveKind()); |
821 | // Force emission of the firstprivate copy if the directive does not emit |
822 | // outlined function, like omp for, omp simd, omp distribute etc. |
823 | bool MustEmitFirstprivateCopy = |
824 | CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown; |
825 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
826 | const auto *IRef = C->varlist_begin(); |
827 | const auto *InitsRef = C->inits().begin(); |
828 | for (const Expr *IInit : C->private_copies()) { |
829 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
830 | bool ThisFirstprivateIsLastprivate = |
831 | Lastprivates.count(Val: OrigVD->getCanonicalDecl()) > 0; |
832 | const FieldDecl *FD = CapturedStmtInfo->lookup(VD: OrigVD); |
833 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
834 | if (!MustEmitFirstprivateCopy && !ThisFirstprivateIsLastprivate && FD && |
835 | !FD->getType()->isReferenceType() && |
836 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
837 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
838 | ++IRef; |
839 | ++InitsRef; |
840 | continue; |
841 | } |
842 | // Do not emit copy for firstprivate constant variables in target regions, |
843 | // captured by reference. |
844 | if (DeviceConstTarget && OrigVD->getType().isConstant(getContext()) && |
845 | FD && FD->getType()->isReferenceType() && |
846 | (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())) { |
847 | EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()); |
848 | ++IRef; |
849 | ++InitsRef; |
850 | continue; |
851 | } |
852 | FirstprivateIsLastprivate = |
853 | FirstprivateIsLastprivate || ThisFirstprivateIsLastprivate; |
854 | if (EmittedAsFirstprivate.insert(OrigVD->getCanonicalDecl()).second) { |
855 | const auto *VDInit = |
856 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *InitsRef)->getDecl()); |
857 | bool IsRegistered; |
858 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
859 | /*RefersToEnclosingVariableOrCapture=*/FD != nullptr, |
860 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
861 | LValue OriginalLVal; |
862 | if (!FD) { |
863 | // Check if the firstprivate variable is just a constant value. |
864 | ConstantEmission CE = tryEmitAsConstant(refExpr: &DRE); |
865 | if (CE && !CE.isReference()) { |
866 | // Constant value, no need to create a copy. |
867 | ++IRef; |
868 | ++InitsRef; |
869 | continue; |
870 | } |
871 | if (CE && CE.isReference()) { |
872 | OriginalLVal = CE.getReferenceLValue(*this, &DRE); |
873 | } else { |
874 | assert(!CE && "Expected non-constant firstprivate." ); |
875 | OriginalLVal = EmitLValue(&DRE); |
876 | } |
877 | } else { |
878 | OriginalLVal = EmitLValue(&DRE); |
879 | } |
880 | QualType Type = VD->getType(); |
881 | if (Type->isArrayType()) { |
882 | // Emit VarDecl with copy init for arrays. |
883 | // Get the address of the original variable captured in current |
884 | // captured region. |
885 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *VD); |
886 | const Expr *Init = VD->getInit(); |
887 | if (!isa<CXXConstructExpr>(Val: Init) || isTrivialInitializer(Init)) { |
888 | // Perform simple memcpy. |
889 | LValue Dest = MakeAddrLValue(Addr: Emission.getAllocatedAddress(), T: Type); |
890 | EmitAggregateAssign(Dest, Src: OriginalLVal, EltTy: Type); |
891 | } else { |
892 | EmitOMPAggregateAssign( |
893 | DestAddr: Emission.getAllocatedAddress(), SrcAddr: OriginalLVal.getAddress(CGF&: *this), |
894 | OriginalType: Type, |
895 | CopyGen: [this, VDInit, Init](Address DestElement, Address SrcElement) { |
896 | // Clean up any temporaries needed by the |
897 | // initialization. |
898 | RunCleanupsScope InitScope(*this); |
899 | // Emit initialization for single element. |
900 | setAddrOfLocalVar(VD: VDInit, Addr: SrcElement); |
901 | EmitAnyExprToMem(E: Init, Location: DestElement, |
902 | Quals: Init->getType().getQualifiers(), |
903 | /*IsInitializer*/ false); |
904 | LocalDeclMap.erase(VDInit); |
905 | }); |
906 | } |
907 | EmitAutoVarCleanups(emission: Emission); |
908 | IsRegistered = |
909 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: Emission.getAllocatedAddress()); |
910 | } else { |
911 | Address OriginalAddr = OriginalLVal.getAddress(CGF&: *this); |
912 | // Emit private VarDecl with copy init. |
913 | // Remap temp VDInit variable to the address of the original |
914 | // variable (for proper handling of captured global variables). |
915 | setAddrOfLocalVar(VD: VDInit, Addr: OriginalAddr); |
916 | EmitDecl(*VD); |
917 | LocalDeclMap.erase(VDInit); |
918 | Address VDAddr = GetAddrOfLocalVar(VD); |
919 | if (ThisFirstprivateIsLastprivate && |
920 | Lastprivates[OrigVD->getCanonicalDecl()] == |
921 | OMPC_LASTPRIVATE_conditional) { |
922 | // Create/init special variable for lastprivate conditionals. |
923 | llvm::Value *V = |
924 | EmitLoadOfScalar(MakeAddrLValue(VDAddr, (*IRef)->getType(), |
925 | AlignmentSource::Decl), |
926 | (*IRef)->getExprLoc()); |
927 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
928 | CGF&: *this, VD: OrigVD); |
929 | EmitStoreOfScalar(V, MakeAddrLValue(VDAddr, (*IRef)->getType(), |
930 | AlignmentSource::Decl)); |
931 | LocalDeclMap.erase(VD); |
932 | setAddrOfLocalVar(VD, Addr: VDAddr); |
933 | } |
934 | IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
935 | } |
936 | assert(IsRegistered && |
937 | "firstprivate var already registered as private" ); |
938 | // Silence the warning about unused variable. |
939 | (void)IsRegistered; |
940 | } |
941 | ++IRef; |
942 | ++InitsRef; |
943 | } |
944 | } |
945 | return FirstprivateIsLastprivate && !EmittedAsFirstprivate.empty(); |
946 | } |
947 | |
948 | void CodeGenFunction::EmitOMPPrivateClause( |
949 | const OMPExecutableDirective &D, |
950 | CodeGenFunction::OMPPrivateScope &PrivateScope) { |
951 | if (!HaveInsertPoint()) |
952 | return; |
953 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
954 | for (const auto *C : D.getClausesOfKind<OMPPrivateClause>()) { |
955 | auto IRef = C->varlist_begin(); |
956 | for (const Expr *IInit : C->private_copies()) { |
957 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
958 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
959 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
960 | EmitDecl(*VD); |
961 | // Emit private VarDecl with copy init. |
962 | bool IsRegistered = |
963 | PrivateScope.addPrivate(LocalVD: OrigVD, Addr: GetAddrOfLocalVar(VD)); |
964 | assert(IsRegistered && "private var already registered as private" ); |
965 | // Silence the warning about unused variable. |
966 | (void)IsRegistered; |
967 | } |
968 | ++IRef; |
969 | } |
970 | } |
971 | } |
972 | |
973 | bool CodeGenFunction::EmitOMPCopyinClause(const OMPExecutableDirective &D) { |
974 | if (!HaveInsertPoint()) |
975 | return false; |
976 | // threadprivate_var1 = master_threadprivate_var1; |
977 | // operator=(threadprivate_var2, master_threadprivate_var2); |
978 | // ... |
979 | // __kmpc_barrier(&loc, global_tid); |
980 | llvm::DenseSet<const VarDecl *> CopiedVars; |
981 | llvm::BasicBlock *CopyBegin = nullptr, *CopyEnd = nullptr; |
982 | for (const auto *C : D.getClausesOfKind<OMPCopyinClause>()) { |
983 | auto IRef = C->varlist_begin(); |
984 | auto ISrcRef = C->source_exprs().begin(); |
985 | auto IDestRef = C->destination_exprs().begin(); |
986 | for (const Expr *AssignOp : C->assignment_ops()) { |
987 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
988 | QualType Type = VD->getType(); |
989 | if (CopiedVars.insert(VD->getCanonicalDecl()).second) { |
990 | // Get the address of the master variable. If we are emitting code with |
991 | // TLS support, the address is passed from the master as field in the |
992 | // captured declaration. |
993 | Address MasterAddr = Address::invalid(); |
994 | if (getLangOpts().OpenMPUseTLS && |
995 | getContext().getTargetInfo().isTLSSupported()) { |
996 | assert(CapturedStmtInfo->lookup(VD) && |
997 | "Copyin threadprivates should have been captured!" ); |
998 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), true, |
999 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1000 | MasterAddr = EmitLValue(&DRE).getAddress(CGF&: *this); |
1001 | LocalDeclMap.erase(VD); |
1002 | } else { |
1003 | MasterAddr = |
1004 | Address(VD->isStaticLocal() ? CGM.getStaticLocalDeclAddress(D: VD) |
1005 | : CGM.GetAddrOfGlobal(GD: VD), |
1006 | CGM.getTypes().ConvertTypeForMem(T: VD->getType()), |
1007 | getContext().getDeclAlign(D: VD)); |
1008 | } |
1009 | // Get the address of the threadprivate variable. |
1010 | Address PrivateAddr = EmitLValue(E: *IRef).getAddress(*this); |
1011 | if (CopiedVars.size() == 1) { |
1012 | // At first check if current thread is a master thread. If it is, no |
1013 | // need to copy data. |
1014 | CopyBegin = createBasicBlock(name: "copyin.not.master" ); |
1015 | CopyEnd = createBasicBlock(name: "copyin.not.master.end" ); |
1016 | // TODO: Avoid ptrtoint conversion. |
1017 | auto *MasterAddrInt = Builder.CreatePtrToInt( |
1018 | V: MasterAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1019 | auto *PrivateAddrInt = Builder.CreatePtrToInt( |
1020 | V: PrivateAddr.emitRawPointer(CGF&: *this), DestTy: CGM.IntPtrTy); |
1021 | Builder.CreateCondBr( |
1022 | Builder.CreateICmpNE(LHS: MasterAddrInt, RHS: PrivateAddrInt), CopyBegin, |
1023 | CopyEnd); |
1024 | EmitBlock(BB: CopyBegin); |
1025 | } |
1026 | const auto *SrcVD = |
1027 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1028 | const auto *DestVD = |
1029 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1030 | EmitOMPCopy(OriginalType: Type, DestAddr: PrivateAddr, SrcAddr: MasterAddr, DestVD, SrcVD, Copy: AssignOp); |
1031 | } |
1032 | ++IRef; |
1033 | ++ISrcRef; |
1034 | ++IDestRef; |
1035 | } |
1036 | } |
1037 | if (CopyEnd) { |
1038 | // Exit out of copying procedure for non-master thread. |
1039 | EmitBlock(BB: CopyEnd, /*IsFinished=*/true); |
1040 | return true; |
1041 | } |
1042 | return false; |
1043 | } |
1044 | |
1045 | bool CodeGenFunction::EmitOMPLastprivateClauseInit( |
1046 | const OMPExecutableDirective &D, OMPPrivateScope &PrivateScope) { |
1047 | if (!HaveInsertPoint()) |
1048 | return false; |
1049 | bool HasAtLeastOneLastprivate = false; |
1050 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
1051 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
1052 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
1053 | for (const Expr *C : LoopDirective->counters()) { |
1054 | SIMDLCVs.insert( |
1055 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
1056 | } |
1057 | } |
1058 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1059 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1060 | HasAtLeastOneLastprivate = true; |
1061 | if (isOpenMPTaskLoopDirective(D.getDirectiveKind()) && |
1062 | !getLangOpts().OpenMPSimd) |
1063 | break; |
1064 | const auto *IRef = C->varlist_begin(); |
1065 | const auto *IDestRef = C->destination_exprs().begin(); |
1066 | for (const Expr *IInit : C->private_copies()) { |
1067 | // Keep the address of the original variable for future update at the end |
1068 | // of the loop. |
1069 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1070 | // Taskloops do not require additional initialization, it is done in |
1071 | // runtime support library. |
1072 | if (AlreadyEmittedVars.insert(OrigVD->getCanonicalDecl()).second) { |
1073 | const auto *DestVD = |
1074 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1075 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
1076 | /*RefersToEnclosingVariableOrCapture=*/ |
1077 | CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
1078 | (*IRef)->getType(), VK_LValue, (*IRef)->getExprLoc()); |
1079 | PrivateScope.addPrivate(LocalVD: DestVD, Addr: EmitLValue(&DRE).getAddress(CGF&: *this)); |
1080 | // Check if the variable is also a firstprivate: in this case IInit is |
1081 | // not generated. Initialization of this variable will happen in codegen |
1082 | // for 'firstprivate' clause. |
1083 | if (IInit && !SIMDLCVs.count(V: OrigVD->getCanonicalDecl())) { |
1084 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IInit)->getDecl()); |
1085 | Address VDAddr = Address::invalid(); |
1086 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) { |
1087 | VDAddr = CGM.getOpenMPRuntime().emitLastprivateConditionalInit( |
1088 | CGF&: *this, VD: OrigVD); |
1089 | setAddrOfLocalVar(VD, Addr: VDAddr); |
1090 | } else { |
1091 | // Emit private VarDecl with copy init. |
1092 | EmitDecl(*VD); |
1093 | VDAddr = GetAddrOfLocalVar(VD); |
1094 | } |
1095 | bool IsRegistered = PrivateScope.addPrivate(LocalVD: OrigVD, Addr: VDAddr); |
1096 | assert(IsRegistered && |
1097 | "lastprivate var already registered as private" ); |
1098 | (void)IsRegistered; |
1099 | } |
1100 | } |
1101 | ++IRef; |
1102 | ++IDestRef; |
1103 | } |
1104 | } |
1105 | return HasAtLeastOneLastprivate; |
1106 | } |
1107 | |
1108 | void CodeGenFunction::EmitOMPLastprivateClauseFinal( |
1109 | const OMPExecutableDirective &D, bool NoFinals, |
1110 | llvm::Value *IsLastIterCond) { |
1111 | if (!HaveInsertPoint()) |
1112 | return; |
1113 | // Emit following code: |
1114 | // if (<IsLastIterCond>) { |
1115 | // orig_var1 = private_orig_var1; |
1116 | // ... |
1117 | // orig_varn = private_orig_varn; |
1118 | // } |
1119 | llvm::BasicBlock *ThenBB = nullptr; |
1120 | llvm::BasicBlock *DoneBB = nullptr; |
1121 | if (IsLastIterCond) { |
1122 | // Emit implicit barrier if at least one lastprivate conditional is found |
1123 | // and this is not a simd mode. |
1124 | if (!getLangOpts().OpenMPSimd && |
1125 | llvm::any_of(Range: D.getClausesOfKind<OMPLastprivateClause>(), |
1126 | P: [](const OMPLastprivateClause *C) { |
1127 | return C->getKind() == OMPC_LASTPRIVATE_conditional; |
1128 | })) { |
1129 | CGM.getOpenMPRuntime().emitBarrierCall(*this, D.getBeginLoc(), |
1130 | OMPD_unknown, |
1131 | /*EmitChecks=*/false, |
1132 | /*ForceSimpleCall=*/true); |
1133 | } |
1134 | ThenBB = createBasicBlock(name: ".omp.lastprivate.then" ); |
1135 | DoneBB = createBasicBlock(name: ".omp.lastprivate.done" ); |
1136 | Builder.CreateCondBr(Cond: IsLastIterCond, True: ThenBB, False: DoneBB); |
1137 | EmitBlock(BB: ThenBB); |
1138 | } |
1139 | llvm::DenseSet<const VarDecl *> AlreadyEmittedVars; |
1140 | llvm::DenseMap<const VarDecl *, const Expr *> LoopCountersAndUpdates; |
1141 | if (const auto *LoopDirective = dyn_cast<OMPLoopDirective>(Val: &D)) { |
1142 | auto IC = LoopDirective->counters().begin(); |
1143 | for (const Expr *F : LoopDirective->finals()) { |
1144 | const auto *D = |
1145 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IC)->getDecl())->getCanonicalDecl(); |
1146 | if (NoFinals) |
1147 | AlreadyEmittedVars.insert(V: D); |
1148 | else |
1149 | LoopCountersAndUpdates[D] = F; |
1150 | ++IC; |
1151 | } |
1152 | } |
1153 | for (const auto *C : D.getClausesOfKind<OMPLastprivateClause>()) { |
1154 | auto IRef = C->varlist_begin(); |
1155 | auto ISrcRef = C->source_exprs().begin(); |
1156 | auto IDestRef = C->destination_exprs().begin(); |
1157 | for (const Expr *AssignOp : C->assignment_ops()) { |
1158 | const auto *PrivateVD = |
1159 | cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
1160 | QualType Type = PrivateVD->getType(); |
1161 | const auto *CanonicalVD = PrivateVD->getCanonicalDecl(); |
1162 | if (AlreadyEmittedVars.insert(CanonicalVD).second) { |
1163 | // If lastprivate variable is a loop control variable for loop-based |
1164 | // directive, update its value before copyin back to original |
1165 | // variable. |
1166 | if (const Expr *FinalExpr = LoopCountersAndUpdates.lookup(Val: CanonicalVD)) |
1167 | EmitIgnoredExpr(E: FinalExpr); |
1168 | const auto *SrcVD = |
1169 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ISrcRef)->getDecl()); |
1170 | const auto *DestVD = |
1171 | cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IDestRef)->getDecl()); |
1172 | // Get the address of the private variable. |
1173 | Address PrivateAddr = GetAddrOfLocalVar(VD: PrivateVD); |
1174 | if (const auto *RefTy = PrivateVD->getType()->getAs<ReferenceType>()) |
1175 | PrivateAddr = Address( |
1176 | Builder.CreateLoad(Addr: PrivateAddr), |
1177 | CGM.getTypes().ConvertTypeForMem(T: RefTy->getPointeeType()), |
1178 | CGM.getNaturalTypeAlignment(T: RefTy->getPointeeType())); |
1179 | // Store the last value to the private copy in the last iteration. |
1180 | if (C->getKind() == OMPC_LASTPRIVATE_conditional) |
1181 | CGM.getOpenMPRuntime().emitLastprivateConditionalFinalUpdate( |
1182 | CGF&: *this, PrivLVal: MakeAddrLValue(PrivateAddr, (*IRef)->getType()), VD: PrivateVD, |
1183 | Loc: (*IRef)->getExprLoc()); |
1184 | // Get the address of the original variable. |
1185 | Address OriginalAddr = GetAddrOfLocalVar(VD: DestVD); |
1186 | EmitOMPCopy(OriginalType: Type, DestAddr: OriginalAddr, SrcAddr: PrivateAddr, DestVD, SrcVD, Copy: AssignOp); |
1187 | } |
1188 | ++IRef; |
1189 | ++ISrcRef; |
1190 | ++IDestRef; |
1191 | } |
1192 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
1193 | EmitIgnoredExpr(E: PostUpdate); |
1194 | } |
1195 | if (IsLastIterCond) |
1196 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1197 | } |
1198 | |
1199 | void CodeGenFunction::EmitOMPReductionClauseInit( |
1200 | const OMPExecutableDirective &D, |
1201 | CodeGenFunction::OMPPrivateScope &PrivateScope, bool ForInscan) { |
1202 | if (!HaveInsertPoint()) |
1203 | return; |
1204 | SmallVector<const Expr *, 4> Shareds; |
1205 | SmallVector<const Expr *, 4> Privates; |
1206 | SmallVector<const Expr *, 4> ReductionOps; |
1207 | SmallVector<const Expr *, 4> LHSs; |
1208 | SmallVector<const Expr *, 4> RHSs; |
1209 | OMPTaskDataTy Data; |
1210 | SmallVector<const Expr *, 4> TaskLHSs; |
1211 | SmallVector<const Expr *, 4> TaskRHSs; |
1212 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1213 | if (ForInscan != (C->getModifier() == OMPC_REDUCTION_inscan)) |
1214 | continue; |
1215 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
1216 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1217 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1218 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1219 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1220 | if (C->getModifier() == OMPC_REDUCTION_task) { |
1221 | Data.ReductionVars.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1222 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
1223 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1224 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
1225 | in_end: C->reduction_ops().end()); |
1226 | TaskLHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1227 | TaskRHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1228 | } |
1229 | } |
1230 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
1231 | unsigned Count = 0; |
1232 | auto *ILHS = LHSs.begin(); |
1233 | auto *IRHS = RHSs.begin(); |
1234 | auto *IPriv = Privates.begin(); |
1235 | for (const Expr *IRef : Shareds) { |
1236 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IPriv)->getDecl()); |
1237 | // Emit private VarDecl with reduction init. |
1238 | RedCG.emitSharedOrigLValue(CGF&: *this, N: Count); |
1239 | RedCG.emitAggregateType(CGF&: *this, N: Count); |
1240 | AutoVarEmission Emission = EmitAutoVarAlloca(var: *PrivateVD); |
1241 | RedCG.emitInitialization(CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress(), |
1242 | SharedAddr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this), |
1243 | DefaultInit: [&Emission](CodeGenFunction &CGF) { |
1244 | CGF.EmitAutoVarInit(emission: Emission); |
1245 | return true; |
1246 | }); |
1247 | EmitAutoVarCleanups(emission: Emission); |
1248 | Address BaseAddr = RedCG.adjustPrivateAddress( |
1249 | CGF&: *this, N: Count, PrivateAddr: Emission.getAllocatedAddress()); |
1250 | bool IsRegistered = |
1251 | PrivateScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Count), Addr: BaseAddr); |
1252 | assert(IsRegistered && "private var already registered as private" ); |
1253 | // Silence the warning about unused variable. |
1254 | (void)IsRegistered; |
1255 | |
1256 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
1257 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
1258 | QualType Type = PrivateVD->getType(); |
1259 | bool isaOMPArraySectionExpr = isa<OMPArraySectionExpr>(Val: IRef); |
1260 | if (isaOMPArraySectionExpr && Type->isVariablyModifiedType()) { |
1261 | // Store the address of the original variable associated with the LHS |
1262 | // implicit variable. |
1263 | PrivateScope.addPrivate(LocalVD: LHSVD, |
1264 | Addr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this)); |
1265 | PrivateScope.addPrivate(LocalVD: RHSVD, Addr: GetAddrOfLocalVar(VD: PrivateVD)); |
1266 | } else if ((isaOMPArraySectionExpr && Type->isScalarType()) || |
1267 | isa<ArraySubscriptExpr>(Val: IRef)) { |
1268 | // Store the address of the original variable associated with the LHS |
1269 | // implicit variable. |
1270 | PrivateScope.addPrivate(LocalVD: LHSVD, |
1271 | Addr: RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this)); |
1272 | PrivateScope.addPrivate(LocalVD: RHSVD, |
1273 | Addr: GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1274 | ElemTy: ConvertTypeForMem(T: RHSVD->getType()))); |
1275 | } else { |
1276 | QualType Type = PrivateVD->getType(); |
1277 | bool IsArray = getContext().getAsArrayType(T: Type) != nullptr; |
1278 | Address OriginalAddr = RedCG.getSharedLValue(N: Count).getAddress(CGF&: *this); |
1279 | // Store the address of the original variable associated with the LHS |
1280 | // implicit variable. |
1281 | if (IsArray) { |
1282 | OriginalAddr = |
1283 | OriginalAddr.withElementType(ElemTy: ConvertTypeForMem(T: LHSVD->getType())); |
1284 | } |
1285 | PrivateScope.addPrivate(LocalVD: LHSVD, Addr: OriginalAddr); |
1286 | PrivateScope.addPrivate( |
1287 | LocalVD: RHSVD, Addr: IsArray ? GetAddrOfLocalVar(VD: PrivateVD).withElementType( |
1288 | ElemTy: ConvertTypeForMem(T: RHSVD->getType())) |
1289 | : GetAddrOfLocalVar(VD: PrivateVD)); |
1290 | } |
1291 | ++ILHS; |
1292 | ++IRHS; |
1293 | ++IPriv; |
1294 | ++Count; |
1295 | } |
1296 | if (!Data.ReductionVars.empty()) { |
1297 | Data.IsReductionWithTaskMod = true; |
1298 | Data.IsWorksharingReduction = |
1299 | isOpenMPWorksharingDirective(D.getDirectiveKind()); |
1300 | llvm::Value *ReductionDesc = CGM.getOpenMPRuntime().emitTaskReductionInit( |
1301 | CGF&: *this, Loc: D.getBeginLoc(), LHSExprs: TaskLHSs, RHSExprs: TaskRHSs, Data); |
1302 | const Expr *TaskRedRef = nullptr; |
1303 | switch (D.getDirectiveKind()) { |
1304 | case OMPD_parallel: |
1305 | TaskRedRef = cast<OMPParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1306 | break; |
1307 | case OMPD_for: |
1308 | TaskRedRef = cast<OMPForDirective>(Val: D).getTaskReductionRefExpr(); |
1309 | break; |
1310 | case OMPD_sections: |
1311 | TaskRedRef = cast<OMPSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1312 | break; |
1313 | case OMPD_parallel_for: |
1314 | TaskRedRef = cast<OMPParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1315 | break; |
1316 | case OMPD_parallel_master: |
1317 | TaskRedRef = |
1318 | cast<OMPParallelMasterDirective>(Val: D).getTaskReductionRefExpr(); |
1319 | break; |
1320 | case OMPD_parallel_sections: |
1321 | TaskRedRef = |
1322 | cast<OMPParallelSectionsDirective>(Val: D).getTaskReductionRefExpr(); |
1323 | break; |
1324 | case OMPD_target_parallel: |
1325 | TaskRedRef = |
1326 | cast<OMPTargetParallelDirective>(Val: D).getTaskReductionRefExpr(); |
1327 | break; |
1328 | case OMPD_target_parallel_for: |
1329 | TaskRedRef = |
1330 | cast<OMPTargetParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1331 | break; |
1332 | case OMPD_distribute_parallel_for: |
1333 | TaskRedRef = |
1334 | cast<OMPDistributeParallelForDirective>(Val: D).getTaskReductionRefExpr(); |
1335 | break; |
1336 | case OMPD_teams_distribute_parallel_for: |
1337 | TaskRedRef = cast<OMPTeamsDistributeParallelForDirective>(Val: D) |
1338 | .getTaskReductionRefExpr(); |
1339 | break; |
1340 | case OMPD_target_teams_distribute_parallel_for: |
1341 | TaskRedRef = cast<OMPTargetTeamsDistributeParallelForDirective>(Val: D) |
1342 | .getTaskReductionRefExpr(); |
1343 | break; |
1344 | case OMPD_simd: |
1345 | case OMPD_for_simd: |
1346 | case OMPD_section: |
1347 | case OMPD_single: |
1348 | case OMPD_master: |
1349 | case OMPD_critical: |
1350 | case OMPD_parallel_for_simd: |
1351 | case OMPD_task: |
1352 | case OMPD_taskyield: |
1353 | case OMPD_error: |
1354 | case OMPD_barrier: |
1355 | case OMPD_taskwait: |
1356 | case OMPD_taskgroup: |
1357 | case OMPD_flush: |
1358 | case OMPD_depobj: |
1359 | case OMPD_scan: |
1360 | case OMPD_ordered: |
1361 | case OMPD_atomic: |
1362 | case OMPD_teams: |
1363 | case OMPD_target: |
1364 | case OMPD_cancellation_point: |
1365 | case OMPD_cancel: |
1366 | case OMPD_target_data: |
1367 | case OMPD_target_enter_data: |
1368 | case OMPD_target_exit_data: |
1369 | case OMPD_taskloop: |
1370 | case OMPD_taskloop_simd: |
1371 | case OMPD_master_taskloop: |
1372 | case OMPD_master_taskloop_simd: |
1373 | case OMPD_parallel_master_taskloop: |
1374 | case OMPD_parallel_master_taskloop_simd: |
1375 | case OMPD_distribute: |
1376 | case OMPD_target_update: |
1377 | case OMPD_distribute_parallel_for_simd: |
1378 | case OMPD_distribute_simd: |
1379 | case OMPD_target_parallel_for_simd: |
1380 | case OMPD_target_simd: |
1381 | case OMPD_teams_distribute: |
1382 | case OMPD_teams_distribute_simd: |
1383 | case OMPD_teams_distribute_parallel_for_simd: |
1384 | case OMPD_target_teams: |
1385 | case OMPD_target_teams_distribute: |
1386 | case OMPD_target_teams_distribute_parallel_for_simd: |
1387 | case OMPD_target_teams_distribute_simd: |
1388 | case OMPD_declare_target: |
1389 | case OMPD_end_declare_target: |
1390 | case OMPD_threadprivate: |
1391 | case OMPD_allocate: |
1392 | case OMPD_declare_reduction: |
1393 | case OMPD_declare_mapper: |
1394 | case OMPD_declare_simd: |
1395 | case OMPD_requires: |
1396 | case OMPD_declare_variant: |
1397 | case OMPD_begin_declare_variant: |
1398 | case OMPD_end_declare_variant: |
1399 | case OMPD_unknown: |
1400 | default: |
1401 | llvm_unreachable("Enexpected directive with task reductions." ); |
1402 | } |
1403 | |
1404 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TaskRedRef)->getDecl()); |
1405 | EmitVarDecl(D: *VD); |
1406 | EmitStoreOfScalar(Value: ReductionDesc, Addr: GetAddrOfLocalVar(VD), |
1407 | /*Volatile=*/false, Ty: TaskRedRef->getType()); |
1408 | } |
1409 | } |
1410 | |
1411 | void CodeGenFunction::EmitOMPReductionClauseFinal( |
1412 | const OMPExecutableDirective &D, const OpenMPDirectiveKind ReductionKind) { |
1413 | if (!HaveInsertPoint()) |
1414 | return; |
1415 | llvm::SmallVector<const Expr *, 8> Privates; |
1416 | llvm::SmallVector<const Expr *, 8> LHSExprs; |
1417 | llvm::SmallVector<const Expr *, 8> RHSExprs; |
1418 | llvm::SmallVector<const Expr *, 8> ReductionOps; |
1419 | bool HasAtLeastOneReduction = false; |
1420 | bool IsReductionWithTaskMod = false; |
1421 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1422 | // Do not emit for inscan reductions. |
1423 | if (C->getModifier() == OMPC_REDUCTION_inscan) |
1424 | continue; |
1425 | HasAtLeastOneReduction = true; |
1426 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
1427 | LHSExprs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
1428 | RHSExprs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
1429 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
1430 | IsReductionWithTaskMod = |
1431 | IsReductionWithTaskMod || C->getModifier() == OMPC_REDUCTION_task; |
1432 | } |
1433 | if (HasAtLeastOneReduction) { |
1434 | if (IsReductionWithTaskMod) { |
1435 | CGM.getOpenMPRuntime().emitTaskReductionFini( |
1436 | *this, D.getBeginLoc(), |
1437 | isOpenMPWorksharingDirective(D.getDirectiveKind())); |
1438 | } |
1439 | bool TeamsLoopCanBeParallel = false; |
1440 | if (auto *TTLD = dyn_cast<OMPTargetTeamsGenericLoopDirective>(Val: &D)) |
1441 | TeamsLoopCanBeParallel = TTLD->canBeParallelFor(); |
1442 | bool WithNowait = D.getSingleClause<OMPNowaitClause>() || |
1443 | isOpenMPParallelDirective(D.getDirectiveKind()) || |
1444 | TeamsLoopCanBeParallel || ReductionKind == OMPD_simd; |
1445 | bool SimpleReduction = ReductionKind == OMPD_simd; |
1446 | // Emit nowait reduction if nowait clause is present or directive is a |
1447 | // parallel directive (it always has implicit barrier). |
1448 | CGM.getOpenMPRuntime().emitReduction( |
1449 | CGF&: *this, Loc: D.getEndLoc(), Privates, LHSExprs, RHSExprs, ReductionOps, |
1450 | Options: {WithNowait, SimpleReduction, ReductionKind}); |
1451 | } |
1452 | } |
1453 | |
1454 | static void emitPostUpdateForReductionClause( |
1455 | CodeGenFunction &CGF, const OMPExecutableDirective &D, |
1456 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
1457 | if (!CGF.HaveInsertPoint()) |
1458 | return; |
1459 | llvm::BasicBlock *DoneBB = nullptr; |
1460 | for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) { |
1461 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) { |
1462 | if (!DoneBB) { |
1463 | if (llvm::Value *Cond = CondGen(CGF)) { |
1464 | // If the first post-update expression is found, emit conditional |
1465 | // block if it was requested. |
1466 | llvm::BasicBlock *ThenBB = CGF.createBasicBlock(name: ".omp.reduction.pu" ); |
1467 | DoneBB = CGF.createBasicBlock(name: ".omp.reduction.pu.done" ); |
1468 | CGF.Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
1469 | CGF.EmitBlock(BB: ThenBB); |
1470 | } |
1471 | } |
1472 | CGF.EmitIgnoredExpr(E: PostUpdate); |
1473 | } |
1474 | } |
1475 | if (DoneBB) |
1476 | CGF.EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
1477 | } |
1478 | |
1479 | namespace { |
1480 | /// Codegen lambda for appending distribute lower and upper bounds to outlined |
1481 | /// parallel function. This is necessary for combined constructs such as |
1482 | /// 'distribute parallel for' |
1483 | typedef llvm::function_ref<void(CodeGenFunction &, |
1484 | const OMPExecutableDirective &, |
1485 | llvm::SmallVectorImpl<llvm::Value *> &)> |
1486 | CodeGenBoundParametersTy; |
1487 | } // anonymous namespace |
1488 | |
1489 | static void |
1490 | checkForLastprivateConditionalUpdate(CodeGenFunction &CGF, |
1491 | const OMPExecutableDirective &S) { |
1492 | if (CGF.getLangOpts().OpenMP < 50) |
1493 | return; |
1494 | llvm::DenseSet<CanonicalDeclPtr<const VarDecl>> PrivateDecls; |
1495 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
1496 | for (const Expr *Ref : C->varlists()) { |
1497 | if (!Ref->getType()->isScalarType()) |
1498 | continue; |
1499 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1500 | if (!DRE) |
1501 | continue; |
1502 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1503 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1504 | } |
1505 | } |
1506 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
1507 | for (const Expr *Ref : C->varlists()) { |
1508 | if (!Ref->getType()->isScalarType()) |
1509 | continue; |
1510 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1511 | if (!DRE) |
1512 | continue; |
1513 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1514 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1515 | } |
1516 | } |
1517 | for (const auto *C : S.getClausesOfKind<OMPLinearClause>()) { |
1518 | for (const Expr *Ref : C->varlists()) { |
1519 | if (!Ref->getType()->isScalarType()) |
1520 | continue; |
1521 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1522 | if (!DRE) |
1523 | continue; |
1524 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1525 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, Ref); |
1526 | } |
1527 | } |
1528 | // Privates should ne analyzed since they are not captured at all. |
1529 | // Task reductions may be skipped - tasks are ignored. |
1530 | // Firstprivates do not return value but may be passed by reference - no need |
1531 | // to check for updated lastprivate conditional. |
1532 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
1533 | for (const Expr *Ref : C->varlists()) { |
1534 | if (!Ref->getType()->isScalarType()) |
1535 | continue; |
1536 | const auto *DRE = dyn_cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
1537 | if (!DRE) |
1538 | continue; |
1539 | PrivateDecls.insert(cast<VarDecl>(DRE->getDecl())); |
1540 | } |
1541 | } |
1542 | CGF.CGM.getOpenMPRuntime().checkAndEmitSharedLastprivateConditional( |
1543 | CGF, D: S, IgnoredDecls: PrivateDecls); |
1544 | } |
1545 | |
1546 | static void emitCommonOMPParallelDirective( |
1547 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
1548 | OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen, |
1549 | const CodeGenBoundParametersTy &CodeGenBoundParameters) { |
1550 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1551 | llvm::Value *NumThreads = nullptr; |
1552 | llvm::Function *OutlinedFn = |
1553 | CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction( |
1554 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
1555 | CodeGen); |
1556 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) { |
1557 | CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); |
1558 | NumThreads = CGF.EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1559 | /*IgnoreResultAssign=*/true); |
1560 | CGF.CGM.getOpenMPRuntime().emitNumThreadsClause( |
1561 | CGF, NumThreads, Loc: NumThreadsClause->getBeginLoc()); |
1562 | } |
1563 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) { |
1564 | CodeGenFunction::RunCleanupsScope ProcBindScope(CGF); |
1565 | CGF.CGM.getOpenMPRuntime().emitProcBindClause( |
1566 | CGF, ProcBindClause->getProcBindKind(), ProcBindClause->getBeginLoc()); |
1567 | } |
1568 | const Expr *IfCond = nullptr; |
1569 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
1570 | if (C->getNameModifier() == OMPD_unknown || |
1571 | C->getNameModifier() == OMPD_parallel) { |
1572 | IfCond = C->getCondition(); |
1573 | break; |
1574 | } |
1575 | } |
1576 | |
1577 | OMPParallelScope Scope(CGF, S); |
1578 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
1579 | // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk |
1580 | // lower and upper bounds with the pragma 'for' chunking mechanism. |
1581 | // The following lambda takes care of appending the lower and upper bound |
1582 | // parameters when necessary |
1583 | CodeGenBoundParameters(CGF, S, CapturedVars); |
1584 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
1585 | CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, Loc: S.getBeginLoc(), OutlinedFn, |
1586 | CapturedVars, IfCond, NumThreads); |
1587 | } |
1588 | |
1589 | static bool isAllocatableDecl(const VarDecl *VD) { |
1590 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1591 | if (!CVD->hasAttr<OMPAllocateDeclAttr>()) |
1592 | return false; |
1593 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1594 | // Use the default allocation. |
1595 | return !((AA->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc || |
1596 | AA->getAllocatorType() == OMPAllocateDeclAttr::OMPNullMemAlloc) && |
1597 | !AA->getAllocator()); |
1598 | } |
1599 | |
1600 | static void emitEmptyBoundParameters(CodeGenFunction &, |
1601 | const OMPExecutableDirective &, |
1602 | llvm::SmallVectorImpl<llvm::Value *> &) {} |
1603 | |
1604 | static void emitOMPCopyinClause(CodeGenFunction &CGF, |
1605 | const OMPExecutableDirective &S) { |
1606 | bool Copyins = CGF.EmitOMPCopyinClause(D: S); |
1607 | if (Copyins) { |
1608 | // Emit implicit barrier to synchronize threads and avoid data races on |
1609 | // propagation master's thread values of threadprivate variables to local |
1610 | // instances of that variables of all other implicit threads. |
1611 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
1612 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
1613 | /*ForceSimpleCall=*/true); |
1614 | } |
1615 | } |
1616 | |
1617 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddressOfLocalVariable( |
1618 | CodeGenFunction &CGF, const VarDecl *VD) { |
1619 | CodeGenModule &CGM = CGF.CGM; |
1620 | auto &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1621 | |
1622 | if (!VD) |
1623 | return Address::invalid(); |
1624 | const VarDecl *CVD = VD->getCanonicalDecl(); |
1625 | if (!isAllocatableDecl(VD: CVD)) |
1626 | return Address::invalid(); |
1627 | llvm::Value *Size; |
1628 | CharUnits Align = CGM.getContext().getDeclAlign(CVD); |
1629 | if (CVD->getType()->isVariablyModifiedType()) { |
1630 | Size = CGF.getTypeSize(Ty: CVD->getType()); |
1631 | // Align the size: ((size + align - 1) / align) * align |
1632 | Size = CGF.Builder.CreateNUWAdd( |
1633 | LHS: Size, RHS: CGM.getSize(numChars: Align - CharUnits::fromQuantity(Quantity: 1))); |
1634 | Size = CGF.Builder.CreateUDiv(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1635 | Size = CGF.Builder.CreateNUWMul(LHS: Size, RHS: CGM.getSize(numChars: Align)); |
1636 | } else { |
1637 | CharUnits Sz = CGM.getContext().getTypeSizeInChars(CVD->getType()); |
1638 | Size = CGM.getSize(numChars: Sz.alignTo(Align)); |
1639 | } |
1640 | |
1641 | const auto *AA = CVD->getAttr<OMPAllocateDeclAttr>(); |
1642 | assert(AA->getAllocator() && |
1643 | "Expected allocator expression for non-default allocator." ); |
1644 | llvm::Value *Allocator = CGF.EmitScalarExpr(E: AA->getAllocator()); |
1645 | // According to the standard, the original allocator type is a enum (integer). |
1646 | // Convert to pointer type, if required. |
1647 | if (Allocator->getType()->isIntegerTy()) |
1648 | Allocator = CGF.Builder.CreateIntToPtr(V: Allocator, DestTy: CGM.VoidPtrTy); |
1649 | else if (Allocator->getType()->isPointerTy()) |
1650 | Allocator = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(V: Allocator, |
1651 | DestTy: CGM.VoidPtrTy); |
1652 | |
1653 | llvm::Value *Addr = OMPBuilder.createOMPAlloc( |
1654 | Loc: CGF.Builder, Size, Allocator, |
1655 | Name: getNameWithSeparators(Parts: {CVD->getName(), ".void.addr" }, FirstSeparator: "." , Separator: "." )); |
1656 | llvm::CallInst *FreeCI = |
1657 | OMPBuilder.createOMPFree(Loc: CGF.Builder, Addr, Allocator); |
1658 | |
1659 | CGF.EHStack.pushCleanup<OMPAllocateCleanupTy>(Kind: NormalAndEHCleanup, A: FreeCI); |
1660 | Addr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( |
1661 | Addr, |
1662 | CGF.ConvertTypeForMem(T: CGM.getContext().getPointerType(CVD->getType())), |
1663 | getNameWithSeparators(Parts: {CVD->getName(), ".addr" }, FirstSeparator: "." , Separator: "." )); |
1664 | return Address(Addr, CGF.ConvertTypeForMem(T: CVD->getType()), Align); |
1665 | } |
1666 | |
1667 | Address CodeGenFunction::OMPBuilderCBHelpers::getAddrOfThreadPrivate( |
1668 | CodeGenFunction &CGF, const VarDecl *VD, Address VDAddr, |
1669 | SourceLocation Loc) { |
1670 | CodeGenModule &CGM = CGF.CGM; |
1671 | if (CGM.getLangOpts().OpenMPUseTLS && |
1672 | CGM.getContext().getTargetInfo().isTLSSupported()) |
1673 | return VDAddr; |
1674 | |
1675 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1676 | |
1677 | llvm::Type *VarTy = VDAddr.getElementType(); |
1678 | llvm::Value *Data = |
1679 | CGF.Builder.CreatePointerCast(V: VDAddr.emitRawPointer(CGF), DestTy: CGM.Int8PtrTy); |
1680 | llvm::ConstantInt *Size = CGM.getSize(numChars: CGM.GetTargetTypeStoreSize(Ty: VarTy)); |
1681 | std::string Suffix = getNameWithSeparators(Parts: {"cache" , "" }); |
1682 | llvm::Twine CacheName = Twine(CGM.getMangledName(GD: VD)).concat(Suffix); |
1683 | |
1684 | llvm::CallInst *ThreadPrivateCacheCall = |
1685 | OMPBuilder.createCachedThreadPrivate(Loc: CGF.Builder, Pointer: Data, Size, Name: CacheName); |
1686 | |
1687 | return Address(ThreadPrivateCacheCall, CGM.Int8Ty, VDAddr.getAlignment()); |
1688 | } |
1689 | |
1690 | std::string CodeGenFunction::OMPBuilderCBHelpers::getNameWithSeparators( |
1691 | ArrayRef<StringRef> Parts, StringRef FirstSeparator, StringRef Separator) { |
1692 | SmallString<128> Buffer; |
1693 | llvm::raw_svector_ostream OS(Buffer); |
1694 | StringRef Sep = FirstSeparator; |
1695 | for (StringRef Part : Parts) { |
1696 | OS << Sep << Part; |
1697 | Sep = Separator; |
1698 | } |
1699 | return OS.str().str(); |
1700 | } |
1701 | |
1702 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
1703 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1704 | InsertPointTy CodeGenIP, Twine RegionName) { |
1705 | CGBuilderTy &Builder = CGF.Builder; |
1706 | Builder.restoreIP(IP: CodeGenIP); |
1707 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1708 | Suffix: "." + RegionName + ".after" ); |
1709 | |
1710 | { |
1711 | OMPBuilderCBHelpers::InlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1712 | CGF.EmitStmt(S: RegionBodyStmt); |
1713 | } |
1714 | |
1715 | if (Builder.saveIP().isSet()) |
1716 | Builder.CreateBr(Dest: FiniBB); |
1717 | } |
1718 | |
1719 | void CodeGenFunction::OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1720 | CodeGenFunction &CGF, const Stmt *RegionBodyStmt, InsertPointTy AllocaIP, |
1721 | InsertPointTy CodeGenIP, Twine RegionName) { |
1722 | CGBuilderTy &Builder = CGF.Builder; |
1723 | Builder.restoreIP(IP: CodeGenIP); |
1724 | llvm::BasicBlock *FiniBB = splitBBWithSuffix(Builder, /*CreateBranch=*/false, |
1725 | Suffix: "." + RegionName + ".after" ); |
1726 | |
1727 | { |
1728 | OMPBuilderCBHelpers::OutlinedRegionBodyRAII IRB(CGF, AllocaIP, *FiniBB); |
1729 | CGF.EmitStmt(S: RegionBodyStmt); |
1730 | } |
1731 | |
1732 | if (Builder.saveIP().isSet()) |
1733 | Builder.CreateBr(Dest: FiniBB); |
1734 | } |
1735 | |
1736 | void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) { |
1737 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
1738 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
1739 | // Check if we have any if clause associated with the directive. |
1740 | llvm::Value *IfCond = nullptr; |
1741 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
1742 | IfCond = EmitScalarExpr(E: C->getCondition(), |
1743 | /*IgnoreResultAssign=*/true); |
1744 | |
1745 | llvm::Value *NumThreads = nullptr; |
1746 | if (const auto *NumThreadsClause = S.getSingleClause<OMPNumThreadsClause>()) |
1747 | NumThreads = EmitScalarExpr(E: NumThreadsClause->getNumThreads(), |
1748 | /*IgnoreResultAssign=*/true); |
1749 | |
1750 | ProcBindKind ProcBind = OMP_PROC_BIND_default; |
1751 | if (const auto *ProcBindClause = S.getSingleClause<OMPProcBindClause>()) |
1752 | ProcBind = ProcBindClause->getProcBindKind(); |
1753 | |
1754 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
1755 | |
1756 | // The cleanup callback that finalizes all variabels at the given location, |
1757 | // thus calls destructors etc. |
1758 | auto FiniCB = [this](InsertPointTy IP) { |
1759 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
1760 | }; |
1761 | |
1762 | // Privatization callback that performs appropriate action for |
1763 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
1764 | // |
1765 | // TODO: This defaults to shared right now. |
1766 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
1767 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
1768 | // The next line is appropriate only for variables (Val) with the |
1769 | // data-sharing attribute "shared". |
1770 | ReplVal = &Val; |
1771 | |
1772 | return CodeGenIP; |
1773 | }; |
1774 | |
1775 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
1776 | const Stmt *ParallelRegionBodyStmt = CS->getCapturedStmt(); |
1777 | |
1778 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
1779 | InsertPointTy CodeGenIP) { |
1780 | OMPBuilderCBHelpers::EmitOMPOutlinedRegionBody( |
1781 | CGF&: *this, RegionBodyStmt: ParallelRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "parallel" ); |
1782 | }; |
1783 | |
1784 | CGCapturedStmtInfo CGSI(*CS, CR_OpenMP); |
1785 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
1786 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
1787 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
1788 | Builder.restoreIP( |
1789 | IP: OMPBuilder.createParallel(Builder, AllocaIP, BodyGenCB, PrivCB, FiniCB, |
1790 | IfCond, NumThreads, ProcBind, S.hasCancel())); |
1791 | return; |
1792 | } |
1793 | |
1794 | // Emit parallel region as a standalone region. |
1795 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
1796 | Action.Enter(CGF); |
1797 | OMPPrivateScope PrivateScope(CGF); |
1798 | emitOMPCopyinClause(CGF, S); |
1799 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
1800 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
1801 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
1802 | (void)PrivateScope.Privatize(); |
1803 | CGF.EmitStmt(S.getCapturedStmt(OMPD_parallel)->getCapturedStmt()); |
1804 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
1805 | }; |
1806 | { |
1807 | auto LPCRegion = |
1808 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
1809 | emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen, |
1810 | emitEmptyBoundParameters); |
1811 | emitPostUpdateForReductionClause(*this, S, |
1812 | [](CodeGenFunction &) { return nullptr; }); |
1813 | } |
1814 | // Check for outer lastprivate conditional update. |
1815 | checkForLastprivateConditionalUpdate(*this, S); |
1816 | } |
1817 | |
1818 | void CodeGenFunction::EmitOMPMetaDirective(const OMPMetaDirective &S) { |
1819 | EmitStmt(S: S.getIfStmt()); |
1820 | } |
1821 | |
1822 | namespace { |
1823 | /// RAII to handle scopes for loop transformation directives. |
1824 | class OMPTransformDirectiveScopeRAII { |
1825 | OMPLoopScope *Scope = nullptr; |
1826 | CodeGenFunction::CGCapturedStmtInfo *CGSI = nullptr; |
1827 | CodeGenFunction::CGCapturedStmtRAII *CapInfoRAII = nullptr; |
1828 | |
1829 | OMPTransformDirectiveScopeRAII(const OMPTransformDirectiveScopeRAII &) = |
1830 | delete; |
1831 | OMPTransformDirectiveScopeRAII & |
1832 | operator=(const OMPTransformDirectiveScopeRAII &) = delete; |
1833 | |
1834 | public: |
1835 | OMPTransformDirectiveScopeRAII(CodeGenFunction &CGF, const Stmt *S) { |
1836 | if (const auto *Dir = dyn_cast<OMPLoopBasedDirective>(Val: S)) { |
1837 | Scope = new OMPLoopScope(CGF, *Dir); |
1838 | CGSI = new CodeGenFunction::CGCapturedStmtInfo(CR_OpenMP); |
1839 | CapInfoRAII = new CodeGenFunction::CGCapturedStmtRAII(CGF, CGSI); |
1840 | } |
1841 | } |
1842 | ~OMPTransformDirectiveScopeRAII() { |
1843 | if (!Scope) |
1844 | return; |
1845 | delete CapInfoRAII; |
1846 | delete CGSI; |
1847 | delete Scope; |
1848 | } |
1849 | }; |
1850 | } // namespace |
1851 | |
1852 | static void emitBody(CodeGenFunction &CGF, const Stmt *S, const Stmt *NextLoop, |
1853 | int MaxLevel, int Level = 0) { |
1854 | assert(Level < MaxLevel && "Too deep lookup during loop body codegen." ); |
1855 | const Stmt *SimplifiedS = S->IgnoreContainers(); |
1856 | if (const auto *CS = dyn_cast<CompoundStmt>(Val: SimplifiedS)) { |
1857 | PrettyStackTraceLoc CrashInfo( |
1858 | CGF.getContext().getSourceManager(), CS->getLBracLoc(), |
1859 | "LLVM IR generation of compound statement ('{}')" ); |
1860 | |
1861 | // Keep track of the current cleanup stack depth, including debug scopes. |
1862 | CodeGenFunction::LexicalScope Scope(CGF, S->getSourceRange()); |
1863 | for (const Stmt *CurStmt : CS->body()) |
1864 | emitBody(CGF, S: CurStmt, NextLoop, MaxLevel, Level); |
1865 | return; |
1866 | } |
1867 | if (SimplifiedS == NextLoop) { |
1868 | if (auto *Dir = dyn_cast<OMPLoopTransformationDirective>(Val: SimplifiedS)) |
1869 | SimplifiedS = Dir->getTransformedStmt(); |
1870 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(Val: SimplifiedS)) |
1871 | SimplifiedS = CanonLoop->getLoopStmt(); |
1872 | if (const auto *For = dyn_cast<ForStmt>(Val: SimplifiedS)) { |
1873 | S = For->getBody(); |
1874 | } else { |
1875 | assert(isa<CXXForRangeStmt>(SimplifiedS) && |
1876 | "Expected canonical for loop or range-based for loop." ); |
1877 | const auto *CXXFor = cast<CXXForRangeStmt>(Val: SimplifiedS); |
1878 | CGF.EmitStmt(S: CXXFor->getLoopVarStmt()); |
1879 | S = CXXFor->getBody(); |
1880 | } |
1881 | if (Level + 1 < MaxLevel) { |
1882 | NextLoop = OMPLoopDirective::tryToFindNextInnerLoop( |
1883 | CurStmt: S, /*TryImperfectlyNestedLoops=*/true); |
1884 | emitBody(CGF, S, NextLoop, MaxLevel, Level: Level + 1); |
1885 | return; |
1886 | } |
1887 | } |
1888 | CGF.EmitStmt(S); |
1889 | } |
1890 | |
1891 | void CodeGenFunction::EmitOMPLoopBody(const OMPLoopDirective &D, |
1892 | JumpDest LoopExit) { |
1893 | RunCleanupsScope BodyScope(*this); |
1894 | // Update counters values on current iteration. |
1895 | for (const Expr *UE : D.updates()) |
1896 | EmitIgnoredExpr(E: UE); |
1897 | // Update the linear variables. |
1898 | // In distribute directives only loop counters may be marked as linear, no |
1899 | // need to generate the code for them. |
1900 | if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { |
1901 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
1902 | for (const Expr *UE : C->updates()) |
1903 | EmitIgnoredExpr(UE); |
1904 | } |
1905 | } |
1906 | |
1907 | // On a continue in the body, jump to the end. |
1908 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.body.continue" ); |
1909 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
1910 | for (const Expr *E : D.finals_conditions()) { |
1911 | if (!E) |
1912 | continue; |
1913 | // Check that loop counter in non-rectangular nest fits into the iteration |
1914 | // space. |
1915 | llvm::BasicBlock *NextBB = createBasicBlock(name: "omp.body.next" ); |
1916 | EmitBranchOnBoolExpr(Cond: E, TrueBlock: NextBB, FalseBlock: Continue.getBlock(), |
1917 | TrueCount: getProfileCount(S: D.getBody())); |
1918 | EmitBlock(BB: NextBB); |
1919 | } |
1920 | |
1921 | OMPPrivateScope InscanScope(*this); |
1922 | EmitOMPReductionClauseInit(D, InscanScope, /*ForInscan=*/true); |
1923 | bool IsInscanRegion = InscanScope.Privatize(); |
1924 | if (IsInscanRegion) { |
1925 | // Need to remember the block before and after scan directive |
1926 | // to dispatch them correctly depending on the clause used in |
1927 | // this directive, inclusive or exclusive. For inclusive scan the natural |
1928 | // order of the blocks is used, for exclusive clause the blocks must be |
1929 | // executed in reverse order. |
1930 | OMPBeforeScanBlock = createBasicBlock(name: "omp.before.scan.bb" ); |
1931 | OMPAfterScanBlock = createBasicBlock(name: "omp.after.scan.bb" ); |
1932 | // No need to allocate inscan exit block, in simd mode it is selected in the |
1933 | // codegen for the scan directive. |
1934 | if (D.getDirectiveKind() != OMPD_simd && !getLangOpts().OpenMPSimd) |
1935 | OMPScanExitBlock = createBasicBlock(name: "omp.exit.inscan.bb" ); |
1936 | OMPScanDispatch = createBasicBlock(name: "omp.inscan.dispatch" ); |
1937 | EmitBranch(Block: OMPScanDispatch); |
1938 | EmitBlock(BB: OMPBeforeScanBlock); |
1939 | } |
1940 | |
1941 | // Emit loop variables for C++ range loops. |
1942 | const Stmt *Body = |
1943 | D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(); |
1944 | // Emit loop body. |
1945 | emitBody(*this, Body, |
1946 | OMPLoopBasedDirective::tryToFindNextInnerLoop( |
1947 | CurStmt: Body, /*TryImperfectlyNestedLoops=*/true), |
1948 | D.getLoopsNumber()); |
1949 | |
1950 | // Jump to the dispatcher at the end of the loop body. |
1951 | if (IsInscanRegion) |
1952 | EmitBranch(Block: OMPScanExitBlock); |
1953 | |
1954 | // The end (updates/cleanups). |
1955 | EmitBlock(BB: Continue.getBlock()); |
1956 | BreakContinueStack.pop_back(); |
1957 | } |
1958 | |
1959 | using EmittedClosureTy = std::pair<llvm::Function *, llvm::Value *>; |
1960 | |
1961 | /// Emit a captured statement and return the function as well as its captured |
1962 | /// closure context. |
1963 | static EmittedClosureTy emitCapturedStmtFunc(CodeGenFunction &ParentCGF, |
1964 | const CapturedStmt *S) { |
1965 | LValue CapStruct = ParentCGF.InitCapturedStruct(S: *S); |
1966 | CodeGenFunction CGF(ParentCGF.CGM, /*suppressNewContext=*/true); |
1967 | std::unique_ptr<CodeGenFunction::CGCapturedStmtInfo> CSI = |
1968 | std::make_unique<CodeGenFunction::CGCapturedStmtInfo>(args: *S); |
1969 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, CSI.get()); |
1970 | llvm::Function *F = CGF.GenerateCapturedStmtFunction(S: *S); |
1971 | |
1972 | return {F, CapStruct.getPointer(CGF&: ParentCGF)}; |
1973 | } |
1974 | |
1975 | /// Emit a call to a previously captured closure. |
1976 | static llvm::CallInst * |
1977 | emitCapturedStmtCall(CodeGenFunction &ParentCGF, EmittedClosureTy Cap, |
1978 | llvm::ArrayRef<llvm::Value *> Args) { |
1979 | // Append the closure context to the argument. |
1980 | SmallVector<llvm::Value *> EffectiveArgs; |
1981 | EffectiveArgs.reserve(N: Args.size() + 1); |
1982 | llvm::append_range(C&: EffectiveArgs, R&: Args); |
1983 | EffectiveArgs.push_back(Elt: Cap.second); |
1984 | |
1985 | return ParentCGF.Builder.CreateCall(Callee: Cap.first, Args: EffectiveArgs); |
1986 | } |
1987 | |
1988 | llvm::CanonicalLoopInfo * |
1989 | CodeGenFunction::EmitOMPCollapsedCanonicalLoopNest(const Stmt *S, int Depth) { |
1990 | assert(Depth == 1 && "Nested loops with OpenMPIRBuilder not yet implemented" ); |
1991 | |
1992 | // The caller is processing the loop-associated directive processing the \p |
1993 | // Depth loops nested in \p S. Put the previous pending loop-associated |
1994 | // directive to the stack. If the current loop-associated directive is a loop |
1995 | // transformation directive, it will push its generated loops onto the stack |
1996 | // such that together with the loops left here they form the combined loop |
1997 | // nest for the parent loop-associated directive. |
1998 | int ParentExpectedOMPLoopDepth = ExpectedOMPLoopDepth; |
1999 | ExpectedOMPLoopDepth = Depth; |
2000 | |
2001 | EmitStmt(S); |
2002 | assert(OMPLoopNestStack.size() >= (size_t)Depth && "Found too few loops" ); |
2003 | |
2004 | // The last added loop is the outermost one. |
2005 | llvm::CanonicalLoopInfo *Result = OMPLoopNestStack.back(); |
2006 | |
2007 | // Pop the \p Depth loops requested by the call from that stack and restore |
2008 | // the previous context. |
2009 | OMPLoopNestStack.pop_back_n(NumItems: Depth); |
2010 | ExpectedOMPLoopDepth = ParentExpectedOMPLoopDepth; |
2011 | |
2012 | return Result; |
2013 | } |
2014 | |
2015 | void CodeGenFunction::EmitOMPCanonicalLoop(const OMPCanonicalLoop *S) { |
2016 | const Stmt *SyntacticalLoop = S->getLoopStmt(); |
2017 | if (!getLangOpts().OpenMPIRBuilder) { |
2018 | // Ignore if OpenMPIRBuilder is not enabled. |
2019 | EmitStmt(S: SyntacticalLoop); |
2020 | return; |
2021 | } |
2022 | |
2023 | LexicalScope ForScope(*this, S->getSourceRange()); |
2024 | |
2025 | // Emit init statements. The Distance/LoopVar funcs may reference variable |
2026 | // declarations they contain. |
2027 | const Stmt *BodyStmt; |
2028 | if (const auto *For = dyn_cast<ForStmt>(Val: SyntacticalLoop)) { |
2029 | if (const Stmt *InitStmt = For->getInit()) |
2030 | EmitStmt(S: InitStmt); |
2031 | BodyStmt = For->getBody(); |
2032 | } else if (const auto *RangeFor = |
2033 | dyn_cast<CXXForRangeStmt>(Val: SyntacticalLoop)) { |
2034 | if (const DeclStmt *RangeStmt = RangeFor->getRangeStmt()) |
2035 | EmitStmt(S: RangeStmt); |
2036 | if (const DeclStmt *BeginStmt = RangeFor->getBeginStmt()) |
2037 | EmitStmt(S: BeginStmt); |
2038 | if (const DeclStmt *EndStmt = RangeFor->getEndStmt()) |
2039 | EmitStmt(S: EndStmt); |
2040 | if (const DeclStmt *LoopVarStmt = RangeFor->getLoopVarStmt()) |
2041 | EmitStmt(S: LoopVarStmt); |
2042 | BodyStmt = RangeFor->getBody(); |
2043 | } else |
2044 | llvm_unreachable("Expected for-stmt or range-based for-stmt" ); |
2045 | |
2046 | // Emit closure for later use. By-value captures will be captured here. |
2047 | const CapturedStmt *DistanceFunc = S->getDistanceFunc(); |
2048 | EmittedClosureTy DistanceClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: DistanceFunc); |
2049 | const CapturedStmt *LoopVarFunc = S->getLoopVarFunc(); |
2050 | EmittedClosureTy LoopVarClosure = emitCapturedStmtFunc(ParentCGF&: *this, S: LoopVarFunc); |
2051 | |
2052 | // Call the distance function to get the number of iterations of the loop to |
2053 | // come. |
2054 | QualType LogicalTy = DistanceFunc->getCapturedDecl() |
2055 | ->getParam(i: 0) |
2056 | ->getType() |
2057 | .getNonReferenceType(); |
2058 | RawAddress CountAddr = CreateMemTemp(T: LogicalTy, Name: ".count.addr" ); |
2059 | emitCapturedStmtCall(ParentCGF&: *this, Cap: DistanceClosure, Args: {CountAddr.getPointer()}); |
2060 | llvm::Value *DistVal = Builder.CreateLoad(Addr: CountAddr, Name: ".count" ); |
2061 | |
2062 | // Emit the loop structure. |
2063 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2064 | auto BodyGen = [&, this](llvm::OpenMPIRBuilder::InsertPointTy CodeGenIP, |
2065 | llvm::Value *IndVar) { |
2066 | Builder.restoreIP(IP: CodeGenIP); |
2067 | |
2068 | // Emit the loop body: Convert the logical iteration number to the loop |
2069 | // variable and emit the body. |
2070 | const DeclRefExpr *LoopVarRef = S->getLoopVarRef(); |
2071 | LValue LCVal = EmitLValue(LoopVarRef); |
2072 | Address LoopVarAddress = LCVal.getAddress(CGF&: *this); |
2073 | emitCapturedStmtCall(ParentCGF&: *this, Cap: LoopVarClosure, |
2074 | Args: {LoopVarAddress.emitRawPointer(CGF&: *this), IndVar}); |
2075 | |
2076 | RunCleanupsScope BodyScope(*this); |
2077 | EmitStmt(S: BodyStmt); |
2078 | }; |
2079 | llvm::CanonicalLoopInfo *CL = |
2080 | OMPBuilder.createCanonicalLoop(Loc: Builder, BodyGenCB: BodyGen, TripCount: DistVal); |
2081 | |
2082 | // Finish up the loop. |
2083 | Builder.restoreIP(IP: CL->getAfterIP()); |
2084 | ForScope.ForceCleanup(); |
2085 | |
2086 | // Remember the CanonicalLoopInfo for parent AST nodes consuming it. |
2087 | OMPLoopNestStack.push_back(Elt: CL); |
2088 | } |
2089 | |
2090 | void CodeGenFunction::EmitOMPInnerLoop( |
2091 | const OMPExecutableDirective &S, bool RequiresCleanup, const Expr *LoopCond, |
2092 | const Expr *IncExpr, |
2093 | const llvm::function_ref<void(CodeGenFunction &)> BodyGen, |
2094 | const llvm::function_ref<void(CodeGenFunction &)> PostIncGen) { |
2095 | auto LoopExit = getJumpDestInCurrentScope(Name: "omp.inner.for.end" ); |
2096 | |
2097 | // Start the loop with a block that tests the condition. |
2098 | auto CondBlock = createBasicBlock(name: "omp.inner.for.cond" ); |
2099 | EmitBlock(BB: CondBlock); |
2100 | const SourceRange R = S.getSourceRange(); |
2101 | |
2102 | // If attributes are attached, push to the basic block with them. |
2103 | const auto &OMPED = cast<OMPExecutableDirective>(Val: S); |
2104 | const CapturedStmt *ICS = OMPED.getInnermostCapturedStmt(); |
2105 | const Stmt *SS = ICS->getCapturedStmt(); |
2106 | const AttributedStmt *AS = dyn_cast_or_null<AttributedStmt>(Val: SS); |
2107 | OMPLoopNestStack.clear(); |
2108 | if (AS) |
2109 | LoopStack.push(Header: CondBlock, Ctx&: CGM.getContext(), CGOpts: CGM.getCodeGenOpts(), |
2110 | Attrs: AS->getAttrs(), StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2111 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2112 | else |
2113 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2114 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2115 | |
2116 | // If there are any cleanups between here and the loop-exit scope, |
2117 | // create a block to stage a loop exit along. |
2118 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2119 | if (RequiresCleanup) |
2120 | ExitBlock = createBasicBlock(name: "omp.inner.for.cond.cleanup" ); |
2121 | |
2122 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.inner.for.body" ); |
2123 | |
2124 | // Emit condition. |
2125 | EmitBranchOnBoolExpr(Cond: LoopCond, TrueBlock: LoopBody, FalseBlock: ExitBlock, TrueCount: getProfileCount(&S)); |
2126 | if (ExitBlock != LoopExit.getBlock()) { |
2127 | EmitBlock(BB: ExitBlock); |
2128 | EmitBranchThroughCleanup(Dest: LoopExit); |
2129 | } |
2130 | |
2131 | EmitBlock(BB: LoopBody); |
2132 | incrementProfileCounter(&S); |
2133 | |
2134 | // Create a block for the increment. |
2135 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.inner.for.inc" ); |
2136 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
2137 | |
2138 | BodyGen(*this); |
2139 | |
2140 | // Emit "IV = IV + 1" and a back-edge to the condition block. |
2141 | EmitBlock(BB: Continue.getBlock()); |
2142 | EmitIgnoredExpr(E: IncExpr); |
2143 | PostIncGen(*this); |
2144 | BreakContinueStack.pop_back(); |
2145 | EmitBranch(Block: CondBlock); |
2146 | LoopStack.pop(); |
2147 | // Emit the fall-through block. |
2148 | EmitBlock(BB: LoopExit.getBlock()); |
2149 | } |
2150 | |
2151 | bool CodeGenFunction::EmitOMPLinearClauseInit(const OMPLoopDirective &D) { |
2152 | if (!HaveInsertPoint()) |
2153 | return false; |
2154 | // Emit inits for the linear variables. |
2155 | bool HasLinears = false; |
2156 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2157 | for (const Expr *Init : C->inits()) { |
2158 | HasLinears = true; |
2159 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(Init)->getDecl()); |
2160 | if (const auto *Ref = |
2161 | dyn_cast<DeclRefExpr>(VD->getInit()->IgnoreImpCasts())) { |
2162 | AutoVarEmission Emission = EmitAutoVarAlloca(*VD); |
2163 | const auto *OrigVD = cast<VarDecl>(Ref->getDecl()); |
2164 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2165 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2166 | VD->getInit()->getType(), VK_LValue, |
2167 | VD->getInit()->getExprLoc()); |
2168 | EmitExprAsInit( |
2169 | &DRE, VD, |
2170 | MakeAddrLValue(Emission.getAllocatedAddress(), VD->getType()), |
2171 | /*capturedByInit=*/false); |
2172 | EmitAutoVarCleanups(Emission); |
2173 | } else { |
2174 | EmitVarDecl(*VD); |
2175 | } |
2176 | } |
2177 | // Emit the linear steps for the linear clauses. |
2178 | // If a step is not constant, it is pre-calculated before the loop. |
2179 | if (const auto *CS = cast_or_null<BinaryOperator>(C->getCalcStep())) |
2180 | if (const auto *SaveRef = cast<DeclRefExpr>(CS->getLHS())) { |
2181 | EmitVarDecl(*cast<VarDecl>(SaveRef->getDecl())); |
2182 | // Emit calculation of the linear step. |
2183 | EmitIgnoredExpr(CS); |
2184 | } |
2185 | } |
2186 | return HasLinears; |
2187 | } |
2188 | |
2189 | void CodeGenFunction::EmitOMPLinearClauseFinal( |
2190 | const OMPLoopDirective &D, |
2191 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2192 | if (!HaveInsertPoint()) |
2193 | return; |
2194 | llvm::BasicBlock *DoneBB = nullptr; |
2195 | // Emit the final values of the linear variables. |
2196 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2197 | auto IC = C->varlist_begin(); |
2198 | for (const Expr *F : C->finals()) { |
2199 | if (!DoneBB) { |
2200 | if (llvm::Value *Cond = CondGen(*this)) { |
2201 | // If the first post-update expression is found, emit conditional |
2202 | // block if it was requested. |
2203 | llvm::BasicBlock *ThenBB = createBasicBlock(".omp.linear.pu" ); |
2204 | DoneBB = createBasicBlock(".omp.linear.pu.done" ); |
2205 | Builder.CreateCondBr(Cond, ThenBB, DoneBB); |
2206 | EmitBlock(ThenBB); |
2207 | } |
2208 | } |
2209 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IC)->getDecl()); |
2210 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(OrigVD), |
2211 | CapturedStmtInfo->lookup(OrigVD) != nullptr, |
2212 | (*IC)->getType(), VK_LValue, (*IC)->getExprLoc()); |
2213 | Address OrigAddr = EmitLValue(&DRE).getAddress(*this); |
2214 | CodeGenFunction::OMPPrivateScope VarScope(*this); |
2215 | VarScope.addPrivate(OrigVD, OrigAddr); |
2216 | (void)VarScope.Privatize(); |
2217 | EmitIgnoredExpr(F); |
2218 | ++IC; |
2219 | } |
2220 | if (const Expr *PostUpdate = C->getPostUpdateExpr()) |
2221 | EmitIgnoredExpr(PostUpdate); |
2222 | } |
2223 | if (DoneBB) |
2224 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2225 | } |
2226 | |
2227 | static void emitAlignedClause(CodeGenFunction &CGF, |
2228 | const OMPExecutableDirective &D) { |
2229 | if (!CGF.HaveInsertPoint()) |
2230 | return; |
2231 | for (const auto *Clause : D.getClausesOfKind<OMPAlignedClause>()) { |
2232 | llvm::APInt ClauseAlignment(64, 0); |
2233 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2234 | auto *AlignmentCI = |
2235 | cast<llvm::ConstantInt>(Val: CGF.EmitScalarExpr(E: AlignmentExpr)); |
2236 | ClauseAlignment = AlignmentCI->getValue(); |
2237 | } |
2238 | for (const Expr *E : Clause->varlists()) { |
2239 | llvm::APInt Alignment(ClauseAlignment); |
2240 | if (Alignment == 0) { |
2241 | // OpenMP [2.8.1, Description] |
2242 | // If no optional parameter is specified, implementation-defined default |
2243 | // alignments for SIMD instructions on the target platforms are assumed. |
2244 | Alignment = |
2245 | CGF.getContext() |
2246 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2247 | E->getType()->getPointeeType())) |
2248 | .getQuantity(); |
2249 | } |
2250 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2251 | "alignment is not power of 2" ); |
2252 | if (Alignment != 0) { |
2253 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2254 | CGF.emitAlignmentAssumption( |
2255 | PtrValue, E, /*No second loc needed*/ SourceLocation(), |
2256 | llvm::ConstantInt::get(CGF.getLLVMContext(), Alignment)); |
2257 | } |
2258 | } |
2259 | } |
2260 | } |
2261 | |
2262 | void CodeGenFunction::EmitOMPPrivateLoopCounters( |
2263 | const OMPLoopDirective &S, CodeGenFunction::OMPPrivateScope &LoopScope) { |
2264 | if (!HaveInsertPoint()) |
2265 | return; |
2266 | auto I = S.private_counters().begin(); |
2267 | for (const Expr *E : S.counters()) { |
2268 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2269 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *I)->getDecl()); |
2270 | // Emit var without initialization. |
2271 | AutoVarEmission VarEmission = EmitAutoVarAlloca(var: *PrivateVD); |
2272 | EmitAutoVarCleanups(emission: VarEmission); |
2273 | LocalDeclMap.erase(PrivateVD); |
2274 | (void)LoopScope.addPrivate(LocalVD: VD, Addr: VarEmission.getAllocatedAddress()); |
2275 | if (LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD) || |
2276 | VD->hasGlobalStorage()) { |
2277 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(VD), |
2278 | LocalDeclMap.count(VD) || CapturedStmtInfo->lookup(VD), |
2279 | E->getType(), VK_LValue, E->getExprLoc()); |
2280 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: EmitLValue(&DRE).getAddress(CGF&: *this)); |
2281 | } else { |
2282 | (void)LoopScope.addPrivate(LocalVD: PrivateVD, Addr: VarEmission.getAllocatedAddress()); |
2283 | } |
2284 | ++I; |
2285 | } |
2286 | // Privatize extra loop counters used in loops for ordered(n) clauses. |
2287 | for (const auto *C : S.getClausesOfKind<OMPOrderedClause>()) { |
2288 | if (!C->getNumForLoops()) |
2289 | continue; |
2290 | for (unsigned I = S.getLoopsNumber(), E = C->getLoopNumIterations().size(); |
2291 | I < E; ++I) { |
2292 | const auto *DRE = cast<DeclRefExpr>(C->getLoopCounter(I)); |
2293 | const auto *VD = cast<VarDecl>(DRE->getDecl()); |
2294 | // Override only those variables that can be captured to avoid re-emission |
2295 | // of the variables declared within the loops. |
2296 | if (DRE->refersToEnclosingVariableOrCapture()) { |
2297 | (void)LoopScope.addPrivate( |
2298 | VD, CreateMemTemp(DRE->getType(), VD->getName())); |
2299 | } |
2300 | } |
2301 | } |
2302 | } |
2303 | |
2304 | static void emitPreCond(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2305 | const Expr *Cond, llvm::BasicBlock *TrueBlock, |
2306 | llvm::BasicBlock *FalseBlock, uint64_t TrueCount) { |
2307 | if (!CGF.HaveInsertPoint()) |
2308 | return; |
2309 | { |
2310 | CodeGenFunction::OMPPrivateScope PreCondScope(CGF); |
2311 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope&: PreCondScope); |
2312 | (void)PreCondScope.Privatize(); |
2313 | // Get initial values of real counters. |
2314 | for (const Expr *I : S.inits()) { |
2315 | CGF.EmitIgnoredExpr(E: I); |
2316 | } |
2317 | } |
2318 | // Create temp loop control variables with their init values to support |
2319 | // non-rectangular loops. |
2320 | CodeGenFunction::OMPMapVars PreCondVars; |
2321 | for (const Expr *E : S.dependent_counters()) { |
2322 | if (!E) |
2323 | continue; |
2324 | assert(!E->getType().getNonReferenceType()->isRecordType() && |
2325 | "dependent counter must not be an iterator." ); |
2326 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
2327 | Address CounterAddr = |
2328 | CGF.CreateMemTemp(VD->getType().getNonReferenceType()); |
2329 | (void)PreCondVars.setVarAddr(CGF, LocalVD: VD, TempAddr: CounterAddr); |
2330 | } |
2331 | (void)PreCondVars.apply(CGF); |
2332 | for (const Expr *E : S.dependent_inits()) { |
2333 | if (!E) |
2334 | continue; |
2335 | CGF.EmitIgnoredExpr(E); |
2336 | } |
2337 | // Check that loop is executed at least one time. |
2338 | CGF.EmitBranchOnBoolExpr(Cond, TrueBlock, FalseBlock, TrueCount); |
2339 | PreCondVars.restore(CGF); |
2340 | } |
2341 | |
2342 | void CodeGenFunction::EmitOMPLinearClause( |
2343 | const OMPLoopDirective &D, CodeGenFunction::OMPPrivateScope &PrivateScope) { |
2344 | if (!HaveInsertPoint()) |
2345 | return; |
2346 | llvm::DenseSet<const VarDecl *> SIMDLCVs; |
2347 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
2348 | const auto *LoopDirective = cast<OMPLoopDirective>(Val: &D); |
2349 | for (const Expr *C : LoopDirective->counters()) { |
2350 | SIMDLCVs.insert( |
2351 | V: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: C)->getDecl())->getCanonicalDecl()); |
2352 | } |
2353 | } |
2354 | for (const auto *C : D.getClausesOfKind<OMPLinearClause>()) { |
2355 | auto CurPrivate = C->privates().begin(); |
2356 | for (const Expr *E : C->varlists()) { |
2357 | const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl()); |
2358 | const auto *PrivateVD = |
2359 | cast<VarDecl>(cast<DeclRefExpr>(*CurPrivate)->getDecl()); |
2360 | if (!SIMDLCVs.count(VD->getCanonicalDecl())) { |
2361 | // Emit private VarDecl with copy init. |
2362 | EmitVarDecl(*PrivateVD); |
2363 | bool IsRegistered = |
2364 | PrivateScope.addPrivate(VD, GetAddrOfLocalVar(PrivateVD)); |
2365 | assert(IsRegistered && "linear var already registered as private" ); |
2366 | // Silence the warning about unused variable. |
2367 | (void)IsRegistered; |
2368 | } else { |
2369 | EmitVarDecl(*PrivateVD); |
2370 | } |
2371 | ++CurPrivate; |
2372 | } |
2373 | } |
2374 | } |
2375 | |
2376 | static void emitSimdlenSafelenClause(CodeGenFunction &CGF, |
2377 | const OMPExecutableDirective &D) { |
2378 | if (!CGF.HaveInsertPoint()) |
2379 | return; |
2380 | if (const auto *C = D.getSingleClause<OMPSimdlenClause>()) { |
2381 | RValue Len = CGF.EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2382 | /*ignoreResult=*/true); |
2383 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2384 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2385 | // In presence of finite 'safelen', it may be unsafe to mark all |
2386 | // the memory instructions parallel, because loop-carried |
2387 | // dependences of 'safelen' iterations are possible. |
2388 | CGF.LoopStack.setParallel(!D.getSingleClause<OMPSafelenClause>()); |
2389 | } else if (const auto *C = D.getSingleClause<OMPSafelenClause>()) { |
2390 | RValue Len = CGF.EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2391 | /*ignoreResult=*/true); |
2392 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2393 | CGF.LoopStack.setVectorizeWidth(Val->getZExtValue()); |
2394 | // In presence of finite 'safelen', it may be unsafe to mark all |
2395 | // the memory instructions parallel, because loop-carried |
2396 | // dependences of 'safelen' iterations are possible. |
2397 | CGF.LoopStack.setParallel(/*Enable=*/false); |
2398 | } |
2399 | } |
2400 | |
2401 | void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { |
2402 | // Walk clauses and process safelen/lastprivate. |
2403 | LoopStack.setParallel(/*Enable=*/true); |
2404 | LoopStack.setVectorizeEnable(); |
2405 | emitSimdlenSafelenClause(*this, D); |
2406 | if (const auto *C = D.getSingleClause<OMPOrderClause>()) |
2407 | if (C->getKind() == OMPC_ORDER_concurrent) |
2408 | LoopStack.setParallel(/*Enable=*/true); |
2409 | if ((D.getDirectiveKind() == OMPD_simd || |
2410 | (getLangOpts().OpenMPSimd && |
2411 | isOpenMPSimdDirective(D.getDirectiveKind()))) && |
2412 | llvm::any_of(D.getClausesOfKind<OMPReductionClause>(), |
2413 | [](const OMPReductionClause *C) { |
2414 | return C->getModifier() == OMPC_REDUCTION_inscan; |
2415 | })) |
2416 | // Disable parallel access in case of prefix sum. |
2417 | LoopStack.setParallel(/*Enable=*/false); |
2418 | } |
2419 | |
2420 | void CodeGenFunction::EmitOMPSimdFinal( |
2421 | const OMPLoopDirective &D, |
2422 | const llvm::function_ref<llvm::Value *(CodeGenFunction &)> CondGen) { |
2423 | if (!HaveInsertPoint()) |
2424 | return; |
2425 | llvm::BasicBlock *DoneBB = nullptr; |
2426 | auto IC = D.counters().begin(); |
2427 | auto IPC = D.private_counters().begin(); |
2428 | for (const Expr *F : D.finals()) { |
2429 | const auto *OrigVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IC))->getDecl()); |
2430 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: (*IPC))->getDecl()); |
2431 | const auto *CED = dyn_cast<OMPCapturedExprDecl>(Val: OrigVD); |
2432 | if (LocalDeclMap.count(OrigVD) || CapturedStmtInfo->lookup(VD: OrigVD) || |
2433 | OrigVD->hasGlobalStorage() || CED) { |
2434 | if (!DoneBB) { |
2435 | if (llvm::Value *Cond = CondGen(*this)) { |
2436 | // If the first post-update expression is found, emit conditional |
2437 | // block if it was requested. |
2438 | llvm::BasicBlock *ThenBB = createBasicBlock(name: ".omp.final.then" ); |
2439 | DoneBB = createBasicBlock(name: ".omp.final.done" ); |
2440 | Builder.CreateCondBr(Cond, True: ThenBB, False: DoneBB); |
2441 | EmitBlock(BB: ThenBB); |
2442 | } |
2443 | } |
2444 | Address OrigAddr = Address::invalid(); |
2445 | if (CED) { |
2446 | OrigAddr = |
2447 | EmitLValue(E: CED->getInit()->IgnoreImpCasts()).getAddress(*this); |
2448 | } else { |
2449 | DeclRefExpr DRE(getContext(), const_cast<VarDecl *>(PrivateVD), |
2450 | /*RefersToEnclosingVariableOrCapture=*/false, |
2451 | (*IPC)->getType(), VK_LValue, (*IPC)->getExprLoc()); |
2452 | OrigAddr = EmitLValue(&DRE).getAddress(CGF&: *this); |
2453 | } |
2454 | OMPPrivateScope VarScope(*this); |
2455 | VarScope.addPrivate(LocalVD: OrigVD, Addr: OrigAddr); |
2456 | (void)VarScope.Privatize(); |
2457 | EmitIgnoredExpr(E: F); |
2458 | } |
2459 | ++IC; |
2460 | ++IPC; |
2461 | } |
2462 | if (DoneBB) |
2463 | EmitBlock(BB: DoneBB, /*IsFinished=*/true); |
2464 | } |
2465 | |
2466 | static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF, |
2467 | const OMPLoopDirective &S, |
2468 | CodeGenFunction::JumpDest LoopExit) { |
2469 | CGF.EmitOMPLoopBody(D: S, LoopExit); |
2470 | CGF.EmitStopPoint(&S); |
2471 | } |
2472 | |
2473 | /// Emit a helper variable and return corresponding lvalue. |
2474 | static LValue EmitOMPHelperVar(CodeGenFunction &CGF, |
2475 | const DeclRefExpr *Helper) { |
2476 | auto VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
2477 | CGF.EmitVarDecl(D: *VDecl); |
2478 | return CGF.EmitLValue(Helper); |
2479 | } |
2480 | |
2481 | static void emitCommonSimdLoop(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2482 | const RegionCodeGenTy &SimdInitGen, |
2483 | const RegionCodeGenTy &BodyCodeGen) { |
2484 | auto &&ThenGen = [&S, &SimdInitGen, &BodyCodeGen](CodeGenFunction &CGF, |
2485 | PrePostActionTy &) { |
2486 | CGOpenMPRuntime::NontemporalDeclsRAII NontemporalsRegion(CGF.CGM, S); |
2487 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2488 | SimdInitGen(CGF); |
2489 | |
2490 | BodyCodeGen(CGF); |
2491 | }; |
2492 | auto &&ElseGen = [&BodyCodeGen](CodeGenFunction &CGF, PrePostActionTy &) { |
2493 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
2494 | CGF.LoopStack.setVectorizeEnable(/*Enable=*/false); |
2495 | |
2496 | BodyCodeGen(CGF); |
2497 | }; |
2498 | const Expr *IfCond = nullptr; |
2499 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
2500 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
2501 | if (CGF.getLangOpts().OpenMP >= 50 && |
2502 | (C->getNameModifier() == OMPD_unknown || |
2503 | C->getNameModifier() == OMPD_simd)) { |
2504 | IfCond = C->getCondition(); |
2505 | break; |
2506 | } |
2507 | } |
2508 | } |
2509 | if (IfCond) { |
2510 | CGF.CGM.getOpenMPRuntime().emitIfClause(CGF, Cond: IfCond, ThenGen, ElseGen); |
2511 | } else { |
2512 | RegionCodeGenTy ThenRCG(ThenGen); |
2513 | ThenRCG(CGF); |
2514 | } |
2515 | } |
2516 | |
2517 | static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S, |
2518 | PrePostActionTy &Action) { |
2519 | Action.Enter(CGF); |
2520 | assert(isOpenMPSimdDirective(S.getDirectiveKind()) && |
2521 | "Expected simd directive" ); |
2522 | OMPLoopScope PreInitScope(CGF, S); |
2523 | // if (PreCond) { |
2524 | // for (IV in 0..LastIteration) BODY; |
2525 | // <Final counter/linear vars updates>; |
2526 | // } |
2527 | // |
2528 | if (isOpenMPDistributeDirective(S.getDirectiveKind()) || |
2529 | isOpenMPWorksharingDirective(S.getDirectiveKind()) || |
2530 | isOpenMPTaskLoopDirective(S.getDirectiveKind())) { |
2531 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable())); |
2532 | (void)EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable())); |
2533 | } |
2534 | |
2535 | // Emit: if (PreCond) - begin. |
2536 | // If the condition constant folds and can be elided, avoid emitting the |
2537 | // whole loop. |
2538 | bool CondConstant; |
2539 | llvm::BasicBlock *ContBlock = nullptr; |
2540 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
2541 | if (!CondConstant) |
2542 | return; |
2543 | } else { |
2544 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "simd.if.then" ); |
2545 | ContBlock = CGF.createBasicBlock(name: "simd.if.end" ); |
2546 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
2547 | TrueCount: CGF.getProfileCount(&S)); |
2548 | CGF.EmitBlock(BB: ThenBlock); |
2549 | CGF.incrementProfileCounter(&S); |
2550 | } |
2551 | |
2552 | // Emit the loop iteration variable. |
2553 | const Expr *IVExpr = S.getIterationVariable(); |
2554 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
2555 | CGF.EmitVarDecl(D: *IVDecl); |
2556 | CGF.EmitIgnoredExpr(E: S.getInit()); |
2557 | |
2558 | // Emit the iterations count variable. |
2559 | // If it is not a variable, Sema decided to calculate iterations count on |
2560 | // each iteration (e.g., it is foldable into a constant). |
2561 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
2562 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
2563 | // Emit calculation of the iterations count. |
2564 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
2565 | } |
2566 | |
2567 | emitAlignedClause(CGF, S); |
2568 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
2569 | { |
2570 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
2571 | CGF.EmitOMPPrivateClause(S, LoopScope); |
2572 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
2573 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
2574 | CGF.EmitOMPReductionClauseInit(S, LoopScope); |
2575 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
2576 | CGF, S, CGF.EmitLValue(E: S.getIterationVariable())); |
2577 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
2578 | (void)LoopScope.Privatize(); |
2579 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
2580 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
2581 | |
2582 | emitCommonSimdLoop( |
2583 | CGF, S, |
2584 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
2585 | CGF.EmitOMPSimdInit(D: S); |
2586 | }, |
2587 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2588 | CGF.EmitOMPInnerLoop( |
2589 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
2590 | [&S](CodeGenFunction &CGF) { |
2591 | emitOMPLoopBodyWithStopPoint(CGF, S, |
2592 | LoopExit: CodeGenFunction::JumpDest()); |
2593 | }, |
2594 | [](CodeGenFunction &) {}); |
2595 | }); |
2596 | CGF.EmitOMPSimdFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2597 | // Emit final copy of the lastprivate variables at the end of loops. |
2598 | if (HasLastprivateClause) |
2599 | CGF.EmitOMPLastprivateClauseFinal(S, /*NoFinals=*/true); |
2600 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_simd); |
2601 | emitPostUpdateForReductionClause(CGF, S, |
2602 | [](CodeGenFunction &) { return nullptr; }); |
2603 | LoopScope.restoreMap(); |
2604 | CGF.EmitOMPLinearClauseFinal(D: S, CondGen: [](CodeGenFunction &) { return nullptr; }); |
2605 | } |
2606 | // Emit: if (PreCond) - end. |
2607 | if (ContBlock) { |
2608 | CGF.EmitBranch(Block: ContBlock); |
2609 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
2610 | } |
2611 | } |
2612 | |
2613 | static bool isSupportedByOpenMPIRBuilder(const OMPSimdDirective &S) { |
2614 | // Check for unsupported clauses |
2615 | for (OMPClause *C : S.clauses()) { |
2616 | // Currently only order, simdlen and safelen clauses are supported |
2617 | if (!(isa<OMPSimdlenClause>(C) || isa<OMPSafelenClause>(C) || |
2618 | isa<OMPOrderClause>(C) || isa<OMPAlignedClause>(C))) |
2619 | return false; |
2620 | } |
2621 | |
2622 | // Check if we have a statement with the ordered directive. |
2623 | // Visit the statement hierarchy to find a compound statement |
2624 | // with a ordered directive in it. |
2625 | if (const auto *CanonLoop = dyn_cast<OMPCanonicalLoop>(S.getRawStmt())) { |
2626 | if (const Stmt *SyntacticalLoop = CanonLoop->getLoopStmt()) { |
2627 | for (const Stmt *SubStmt : SyntacticalLoop->children()) { |
2628 | if (!SubStmt) |
2629 | continue; |
2630 | if (const CompoundStmt *CS = dyn_cast<CompoundStmt>(SubStmt)) { |
2631 | for (const Stmt *CSSubStmt : CS->children()) { |
2632 | if (!CSSubStmt) |
2633 | continue; |
2634 | if (isa<OMPOrderedDirective>(CSSubStmt)) { |
2635 | return false; |
2636 | } |
2637 | } |
2638 | } |
2639 | } |
2640 | } |
2641 | } |
2642 | return true; |
2643 | } |
2644 | static llvm::MapVector<llvm::Value *, llvm::Value *> |
2645 | GetAlignedMapping(const OMPSimdDirective &S, CodeGenFunction &CGF) { |
2646 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars; |
2647 | for (const auto *Clause : S.getClausesOfKind<OMPAlignedClause>()) { |
2648 | llvm::APInt ClauseAlignment(64, 0); |
2649 | if (const Expr *AlignmentExpr = Clause->getAlignment()) { |
2650 | auto *AlignmentCI = |
2651 | cast<llvm::ConstantInt>(CGF.EmitScalarExpr(AlignmentExpr)); |
2652 | ClauseAlignment = AlignmentCI->getValue(); |
2653 | } |
2654 | for (const Expr *E : Clause->varlists()) { |
2655 | llvm::APInt Alignment(ClauseAlignment); |
2656 | if (Alignment == 0) { |
2657 | // OpenMP [2.8.1, Description] |
2658 | // If no optional parameter is specified, implementation-defined default |
2659 | // alignments for SIMD instructions on the target platforms are assumed. |
2660 | Alignment = |
2661 | CGF.getContext() |
2662 | .toCharUnitsFromBits(CGF.getContext().getOpenMPDefaultSimdAlign( |
2663 | E->getType()->getPointeeType())) |
2664 | .getQuantity(); |
2665 | } |
2666 | assert((Alignment == 0 || Alignment.isPowerOf2()) && |
2667 | "alignment is not power of 2" ); |
2668 | llvm::Value *PtrValue = CGF.EmitScalarExpr(E); |
2669 | AlignedVars[PtrValue] = CGF.Builder.getInt64(Alignment.getSExtValue()); |
2670 | } |
2671 | } |
2672 | return AlignedVars; |
2673 | } |
2674 | |
2675 | void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) { |
2676 | bool UseOMPIRBuilder = |
2677 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
2678 | if (UseOMPIRBuilder) { |
2679 | auto &&CodeGenIRBuilder = [this, &S, UseOMPIRBuilder](CodeGenFunction &CGF, |
2680 | PrePostActionTy &) { |
2681 | // Use the OpenMPIRBuilder if enabled. |
2682 | if (UseOMPIRBuilder) { |
2683 | llvm::MapVector<llvm::Value *, llvm::Value *> AlignedVars = |
2684 | GetAlignedMapping(S, CGF); |
2685 | // Emit the associated statement and get its loop representation. |
2686 | const Stmt *Inner = S.getRawStmt(); |
2687 | llvm::CanonicalLoopInfo *CLI = |
2688 | EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2689 | |
2690 | llvm::OpenMPIRBuilder &OMPBuilder = |
2691 | CGM.getOpenMPRuntime().getOMPBuilder(); |
2692 | // Add SIMD specific metadata |
2693 | llvm::ConstantInt *Simdlen = nullptr; |
2694 | if (const auto *C = S.getSingleClause<OMPSimdlenClause>()) { |
2695 | RValue Len = |
2696 | this->EmitAnyExpr(E: C->getSimdlen(), aggSlot: AggValueSlot::ignored(), |
2697 | /*ignoreResult=*/true); |
2698 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2699 | Simdlen = Val; |
2700 | } |
2701 | llvm::ConstantInt *Safelen = nullptr; |
2702 | if (const auto *C = S.getSingleClause<OMPSafelenClause>()) { |
2703 | RValue Len = |
2704 | this->EmitAnyExpr(E: C->getSafelen(), aggSlot: AggValueSlot::ignored(), |
2705 | /*ignoreResult=*/true); |
2706 | auto *Val = cast<llvm::ConstantInt>(Val: Len.getScalarVal()); |
2707 | Safelen = Val; |
2708 | } |
2709 | llvm::omp::OrderKind Order = llvm::omp::OrderKind::OMP_ORDER_unknown; |
2710 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
2711 | if (C->getKind() == OpenMPOrderClauseKind ::OMPC_ORDER_concurrent) { |
2712 | Order = llvm::omp::OrderKind::OMP_ORDER_concurrent; |
2713 | } |
2714 | } |
2715 | // Add simd metadata to the collapsed loop. Do not generate |
2716 | // another loop for if clause. Support for if clause is done earlier. |
2717 | OMPBuilder.applySimd(CLI, AlignedVars, |
2718 | /*IfCond*/ nullptr, Order, Simdlen, Safelen); |
2719 | return; |
2720 | } |
2721 | }; |
2722 | { |
2723 | auto LPCRegion = |
2724 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2725 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2726 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, |
2727 | CodeGenIRBuilder); |
2728 | } |
2729 | return; |
2730 | } |
2731 | |
2732 | ParentLoopDirectiveForScanRegion ScanRegion(*this, S); |
2733 | OMPFirstScanLoop = true; |
2734 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
2735 | emitOMPSimdRegion(CGF, S, Action); |
2736 | }; |
2737 | { |
2738 | auto LPCRegion = |
2739 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
2740 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
2741 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
2742 | } |
2743 | // Check for outer lastprivate conditional update. |
2744 | checkForLastprivateConditionalUpdate(*this, S); |
2745 | } |
2746 | |
2747 | void CodeGenFunction::EmitOMPTileDirective(const OMPTileDirective &S) { |
2748 | // Emit the de-sugared statement. |
2749 | OMPTransformDirectiveScopeRAII TileScope(*this, &S); |
2750 | EmitStmt(S: S.getTransformedStmt()); |
2751 | } |
2752 | |
2753 | void CodeGenFunction::EmitOMPUnrollDirective(const OMPUnrollDirective &S) { |
2754 | bool UseOMPIRBuilder = CGM.getLangOpts().OpenMPIRBuilder; |
2755 | |
2756 | if (UseOMPIRBuilder) { |
2757 | auto DL = SourceLocToDebugLoc(Location: S.getBeginLoc()); |
2758 | const Stmt *Inner = S.getRawStmt(); |
2759 | |
2760 | // Consume nested loop. Clear the entire remaining loop stack because a |
2761 | // fully unrolled loop is non-transformable. For partial unrolling the |
2762 | // generated outer loop is pushed back to the stack. |
2763 | llvm::CanonicalLoopInfo *CLI = EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
2764 | OMPLoopNestStack.clear(); |
2765 | |
2766 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
2767 | |
2768 | bool NeedsUnrolledCLI = ExpectedOMPLoopDepth >= 1; |
2769 | llvm::CanonicalLoopInfo *UnrolledCLI = nullptr; |
2770 | |
2771 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2772 | assert(ExpectedOMPLoopDepth == 0); |
2773 | OMPBuilder.unrollLoopFull(DL: DL, Loop: CLI); |
2774 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2775 | uint64_t Factor = 0; |
2776 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2777 | Factor = FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2778 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2779 | } |
2780 | OMPBuilder.unrollLoopPartial(DL: DL, Loop: CLI, Factor, |
2781 | UnrolledCLI: NeedsUnrolledCLI ? &UnrolledCLI : nullptr); |
2782 | } else { |
2783 | OMPBuilder.unrollLoopHeuristic(DL: DL, Loop: CLI); |
2784 | } |
2785 | |
2786 | assert((!NeedsUnrolledCLI || UnrolledCLI) && |
2787 | "NeedsUnrolledCLI implies UnrolledCLI to be set" ); |
2788 | if (UnrolledCLI) |
2789 | OMPLoopNestStack.push_back(Elt: UnrolledCLI); |
2790 | |
2791 | return; |
2792 | } |
2793 | |
2794 | // This function is only called if the unrolled loop is not consumed by any |
2795 | // other loop-associated construct. Such a loop-associated construct will have |
2796 | // used the transformed AST. |
2797 | |
2798 | // Set the unroll metadata for the next emitted loop. |
2799 | LoopStack.setUnrollState(LoopAttributes::Enable); |
2800 | |
2801 | if (S.hasClausesOfKind<OMPFullClause>()) { |
2802 | LoopStack.setUnrollState(LoopAttributes::Full); |
2803 | } else if (auto *PartialClause = S.getSingleClause<OMPPartialClause>()) { |
2804 | if (Expr *FactorExpr = PartialClause->getFactor()) { |
2805 | uint64_t Factor = |
2806 | FactorExpr->EvaluateKnownConstInt(Ctx: getContext()).getZExtValue(); |
2807 | assert(Factor >= 1 && "Only positive factors are valid" ); |
2808 | LoopStack.setUnrollCount(Factor); |
2809 | } |
2810 | } |
2811 | |
2812 | EmitStmt(S: S.getAssociatedStmt()); |
2813 | } |
2814 | |
2815 | void CodeGenFunction::EmitOMPOuterLoop( |
2816 | bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S, |
2817 | CodeGenFunction::OMPPrivateScope &LoopScope, |
2818 | const CodeGenFunction::OMPLoopArguments &LoopArgs, |
2819 | const CodeGenFunction::CodeGenLoopTy &CodeGenLoop, |
2820 | const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) { |
2821 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2822 | |
2823 | const Expr *IVExpr = S.getIterationVariable(); |
2824 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
2825 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2826 | |
2827 | JumpDest LoopExit = getJumpDestInCurrentScope(Name: "omp.dispatch.end" ); |
2828 | |
2829 | // Start the loop with a block that tests the condition. |
2830 | llvm::BasicBlock *CondBlock = createBasicBlock(name: "omp.dispatch.cond" ); |
2831 | EmitBlock(BB: CondBlock); |
2832 | const SourceRange R = S.getSourceRange(); |
2833 | OMPLoopNestStack.clear(); |
2834 | LoopStack.push(Header: CondBlock, StartLoc: SourceLocToDebugLoc(Location: R.getBegin()), |
2835 | EndLoc: SourceLocToDebugLoc(Location: R.getEnd())); |
2836 | |
2837 | llvm::Value *BoolCondVal = nullptr; |
2838 | if (!DynamicOrOrdered) { |
2839 | // UB = min(UB, GlobalUB) or |
2840 | // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g. |
2841 | // 'distribute parallel for') |
2842 | EmitIgnoredExpr(E: LoopArgs.EUB); |
2843 | // IV = LB |
2844 | EmitIgnoredExpr(E: LoopArgs.Init); |
2845 | // IV < UB |
2846 | BoolCondVal = EvaluateExprAsBool(E: LoopArgs.Cond); |
2847 | } else { |
2848 | BoolCondVal = |
2849 | RT.emitForNext(CGF&: *this, Loc: S.getBeginLoc(), IVSize, IVSigned, IL: LoopArgs.IL, |
2850 | LB: LoopArgs.LB, UB: LoopArgs.UB, ST: LoopArgs.ST); |
2851 | } |
2852 | |
2853 | // If there are any cleanups between here and the loop-exit scope, |
2854 | // create a block to stage a loop exit along. |
2855 | llvm::BasicBlock *ExitBlock = LoopExit.getBlock(); |
2856 | if (LoopScope.requiresCleanups()) |
2857 | ExitBlock = createBasicBlock(name: "omp.dispatch.cleanup" ); |
2858 | |
2859 | llvm::BasicBlock *LoopBody = createBasicBlock(name: "omp.dispatch.body" ); |
2860 | Builder.CreateCondBr(Cond: BoolCondVal, True: LoopBody, False: ExitBlock); |
2861 | if (ExitBlock != LoopExit.getBlock()) { |
2862 | EmitBlock(BB: ExitBlock); |
2863 | EmitBranchThroughCleanup(Dest: LoopExit); |
2864 | } |
2865 | EmitBlock(BB: LoopBody); |
2866 | |
2867 | // Emit "IV = LB" (in case of static schedule, we have already calculated new |
2868 | // LB for loop condition and emitted it above). |
2869 | if (DynamicOrOrdered) |
2870 | EmitIgnoredExpr(E: LoopArgs.Init); |
2871 | |
2872 | // Create a block for the increment. |
2873 | JumpDest Continue = getJumpDestInCurrentScope(Name: "omp.dispatch.inc" ); |
2874 | BreakContinueStack.push_back(Elt: BreakContinue(LoopExit, Continue)); |
2875 | |
2876 | emitCommonSimdLoop( |
2877 | CGF&: *this, S, |
2878 | SimdInitGen: [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { |
2879 | // Generate !llvm.loop.parallel metadata for loads and stores for loops |
2880 | // with dynamic/guided scheduling and without ordered clause. |
2881 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
2882 | CGF.LoopStack.setParallel(!IsMonotonic); |
2883 | if (const auto *C = S.getSingleClause<OMPOrderClause>()) |
2884 | if (C->getKind() == OMPC_ORDER_concurrent) |
2885 | CGF.LoopStack.setParallel(/*Enable=*/true); |
2886 | } else { |
2887 | CGF.EmitOMPSimdInit(D: S); |
2888 | } |
2889 | }, |
2890 | BodyCodeGen: [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, |
2891 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
2892 | SourceLocation Loc = S.getBeginLoc(); |
2893 | // when 'distribute' is not combined with a 'for': |
2894 | // while (idx <= UB) { BODY; ++idx; } |
2895 | // when 'distribute' is combined with a 'for' |
2896 | // (e.g. 'distribute parallel for') |
2897 | // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; } |
2898 | CGF.EmitOMPInnerLoop( |
2899 | S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr, |
2900 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
2901 | CodeGenLoop(CGF, S, LoopExit); |
2902 | }, |
2903 | [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) { |
2904 | CodeGenOrdered(CGF, Loc, IVSize, IVSigned); |
2905 | }); |
2906 | }); |
2907 | |
2908 | EmitBlock(BB: Continue.getBlock()); |
2909 | BreakContinueStack.pop_back(); |
2910 | if (!DynamicOrOrdered) { |
2911 | // Emit "LB = LB + Stride", "UB = UB + Stride". |
2912 | EmitIgnoredExpr(E: LoopArgs.NextLB); |
2913 | EmitIgnoredExpr(E: LoopArgs.NextUB); |
2914 | } |
2915 | |
2916 | EmitBranch(Block: CondBlock); |
2917 | OMPLoopNestStack.clear(); |
2918 | LoopStack.pop(); |
2919 | // Emit the fall-through block. |
2920 | EmitBlock(BB: LoopExit.getBlock()); |
2921 | |
2922 | // Tell the runtime we are done. |
2923 | auto &&CodeGen = [DynamicOrOrdered, &S, &LoopArgs](CodeGenFunction &CGF) { |
2924 | if (!DynamicOrOrdered) |
2925 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
2926 | LoopArgs.DKind); |
2927 | }; |
2928 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
2929 | } |
2930 | |
2931 | void CodeGenFunction::EmitOMPForOuterLoop( |
2932 | const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic, |
2933 | const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered, |
2934 | const OMPLoopArguments &LoopArgs, |
2935 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
2936 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
2937 | |
2938 | // Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime). |
2939 | const bool DynamicOrOrdered = Ordered || RT.isDynamic(ScheduleKind: ScheduleKind.Schedule); |
2940 | |
2941 | assert((Ordered || !RT.isStaticNonchunked(ScheduleKind.Schedule, |
2942 | LoopArgs.Chunk != nullptr)) && |
2943 | "static non-chunked schedule does not need outer loop" ); |
2944 | |
2945 | // Emit outer loop. |
2946 | // |
2947 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2948 | // When schedule(dynamic,chunk_size) is specified, the iterations are |
2949 | // distributed to threads in the team in chunks as the threads request them. |
2950 | // Each thread executes a chunk of iterations, then requests another chunk, |
2951 | // until no chunks remain to be distributed. Each chunk contains chunk_size |
2952 | // iterations, except for the last chunk to be distributed, which may have |
2953 | // fewer iterations. When no chunk_size is specified, it defaults to 1. |
2954 | // |
2955 | // When schedule(guided,chunk_size) is specified, the iterations are assigned |
2956 | // to threads in the team in chunks as the executing threads request them. |
2957 | // Each thread executes a chunk of iterations, then requests another chunk, |
2958 | // until no chunks remain to be assigned. For a chunk_size of 1, the size of |
2959 | // each chunk is proportional to the number of unassigned iterations divided |
2960 | // by the number of threads in the team, decreasing to 1. For a chunk_size |
2961 | // with value k (greater than 1), the size of each chunk is determined in the |
2962 | // same way, with the restriction that the chunks do not contain fewer than k |
2963 | // iterations (except for the last chunk to be assigned, which may have fewer |
2964 | // than k iterations). |
2965 | // |
2966 | // When schedule(auto) is specified, the decision regarding scheduling is |
2967 | // delegated to the compiler and/or runtime system. The programmer gives the |
2968 | // implementation the freedom to choose any possible mapping of iterations to |
2969 | // threads in the team. |
2970 | // |
2971 | // When schedule(runtime) is specified, the decision regarding scheduling is |
2972 | // deferred until run time, and the schedule and chunk size are taken from the |
2973 | // run-sched-var ICV. If the ICV is set to auto, the schedule is |
2974 | // implementation defined |
2975 | // |
2976 | // while(__kmpc_dispatch_next(&LB, &UB)) { |
2977 | // idx = LB; |
2978 | // while (idx <= UB) { BODY; ++idx; |
2979 | // __kmpc_dispatch_fini_(4|8)[u](); // For ordered loops only. |
2980 | // } // inner loop |
2981 | // } |
2982 | // |
2983 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
2984 | // When schedule(static, chunk_size) is specified, iterations are divided into |
2985 | // chunks of size chunk_size, and the chunks are assigned to the threads in |
2986 | // the team in a round-robin fashion in the order of the thread number. |
2987 | // |
2988 | // while(UB = min(UB, GlobalUB), idx = LB, idx < UB) { |
2989 | // while (idx <= UB) { BODY; ++idx; } // inner loop |
2990 | // LB = LB + ST; |
2991 | // UB = UB + ST; |
2992 | // } |
2993 | // |
2994 | |
2995 | const Expr *IVExpr = S.getIterationVariable(); |
2996 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
2997 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
2998 | |
2999 | if (DynamicOrOrdered) { |
3000 | const std::pair<llvm::Value *, llvm::Value *> DispatchBounds = |
3001 | CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB); |
3002 | llvm::Value *LBVal = DispatchBounds.first; |
3003 | llvm::Value *UBVal = DispatchBounds.second; |
3004 | CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal, |
3005 | LoopArgs.Chunk}; |
3006 | RT.emitForDispatchInit(CGF&: *this, Loc: S.getBeginLoc(), ScheduleKind, IVSize, |
3007 | IVSigned, Ordered, DispatchValues: DipatchRTInputValues); |
3008 | } else { |
3009 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3010 | IVSize, IVSigned, Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB, |
3011 | LoopArgs.ST, LoopArgs.Chunk); |
3012 | RT.emitForStaticInit(*this, S.getBeginLoc(), S.getDirectiveKind(), |
3013 | ScheduleKind, StaticInit); |
3014 | } |
3015 | |
3016 | auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc, |
3017 | const unsigned IVSize, |
3018 | const bool IVSigned) { |
3019 | if (Ordered) { |
3020 | CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize, |
3021 | IVSigned); |
3022 | } |
3023 | }; |
3024 | |
3025 | OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST, |
3026 | LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB); |
3027 | OuterLoopArgs.IncExpr = S.getInc(); |
3028 | OuterLoopArgs.Init = S.getInit(); |
3029 | OuterLoopArgs.Cond = S.getCond(); |
3030 | OuterLoopArgs.NextLB = S.getNextLowerBound(); |
3031 | OuterLoopArgs.NextUB = S.getNextUpperBound(); |
3032 | OuterLoopArgs.DKind = LoopArgs.DKind; |
3033 | EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, LoopArgs: OuterLoopArgs, |
3034 | CodeGenLoop: emitOMPLoopBodyWithStopPoint, CodeGenOrdered); |
3035 | } |
3036 | |
3037 | static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc, |
3038 | const unsigned IVSize, const bool IVSigned) {} |
3039 | |
3040 | void CodeGenFunction::EmitOMPDistributeOuterLoop( |
3041 | OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S, |
3042 | OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs, |
3043 | const CodeGenLoopTy &CodeGenLoopContent) { |
3044 | |
3045 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3046 | |
3047 | // Emit outer loop. |
3048 | // Same behavior as a OMPForOuterLoop, except that schedule cannot be |
3049 | // dynamic |
3050 | // |
3051 | |
3052 | const Expr *IVExpr = S.getIterationVariable(); |
3053 | const unsigned IVSize = getContext().getTypeSize(T: IVExpr->getType()); |
3054 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3055 | |
3056 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3057 | IVSize, IVSigned, /* Ordered = */ false, LoopArgs.IL, LoopArgs.LB, |
3058 | LoopArgs.UB, LoopArgs.ST, LoopArgs.Chunk); |
3059 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, Values: StaticInit); |
3060 | |
3061 | // for combined 'distribute' and 'for' the increment expression of distribute |
3062 | // is stored in DistInc. For 'distribute' alone, it is in Inc. |
3063 | Expr *IncExpr; |
3064 | if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())) |
3065 | IncExpr = S.getDistInc(); |
3066 | else |
3067 | IncExpr = S.getInc(); |
3068 | |
3069 | // this routine is shared by 'omp distribute parallel for' and |
3070 | // 'omp distribute': select the right EUB expression depending on the |
3071 | // directive |
3072 | OMPLoopArguments OuterLoopArgs; |
3073 | OuterLoopArgs.LB = LoopArgs.LB; |
3074 | OuterLoopArgs.UB = LoopArgs.UB; |
3075 | OuterLoopArgs.ST = LoopArgs.ST; |
3076 | OuterLoopArgs.IL = LoopArgs.IL; |
3077 | OuterLoopArgs.Chunk = LoopArgs.Chunk; |
3078 | OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3079 | ? S.getCombinedEnsureUpperBound() |
3080 | : S.getEnsureUpperBound(); |
3081 | OuterLoopArgs.IncExpr = IncExpr; |
3082 | OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3083 | ? S.getCombinedInit() |
3084 | : S.getInit(); |
3085 | OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3086 | ? S.getCombinedCond() |
3087 | : S.getCond(); |
3088 | OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3089 | ? S.getCombinedNextLowerBound() |
3090 | : S.getNextLowerBound(); |
3091 | OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
3092 | ? S.getCombinedNextUpperBound() |
3093 | : S.getNextUpperBound(); |
3094 | OuterLoopArgs.DKind = OMPD_distribute; |
3095 | |
3096 | EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S, |
3097 | LoopScope, LoopArgs: OuterLoopArgs, CodeGenLoop: CodeGenLoopContent, |
3098 | CodeGenOrdered: emitEmptyOrdered); |
3099 | } |
3100 | |
3101 | static std::pair<LValue, LValue> |
3102 | emitDistributeParallelForInnerBounds(CodeGenFunction &CGF, |
3103 | const OMPExecutableDirective &S) { |
3104 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3105 | LValue LB = |
3106 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3107 | LValue UB = |
3108 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3109 | |
3110 | // When composing 'distribute' with 'for' (e.g. as in 'distribute |
3111 | // parallel for') we need to use the 'distribute' |
3112 | // chunk lower and upper bounds rather than the whole loop iteration |
3113 | // space. These are parameters to the outlined function for 'parallel' |
3114 | // and we copy the bounds of the previous schedule into the |
3115 | // the current ones. |
3116 | LValue PrevLB = CGF.EmitLValue(E: LS.getPrevLowerBoundVariable()); |
3117 | LValue PrevUB = CGF.EmitLValue(E: LS.getPrevUpperBoundVariable()); |
3118 | llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar( |
3119 | lvalue: PrevLB, Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3120 | PrevLBVal = CGF.EmitScalarConversion( |
3121 | Src: PrevLBVal, SrcTy: LS.getPrevLowerBoundVariable()->getType(), |
3122 | DstTy: LS.getIterationVariable()->getType(), |
3123 | Loc: LS.getPrevLowerBoundVariable()->getExprLoc()); |
3124 | llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar( |
3125 | lvalue: PrevUB, Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3126 | PrevUBVal = CGF.EmitScalarConversion( |
3127 | Src: PrevUBVal, SrcTy: LS.getPrevUpperBoundVariable()->getType(), |
3128 | DstTy: LS.getIterationVariable()->getType(), |
3129 | Loc: LS.getPrevUpperBoundVariable()->getExprLoc()); |
3130 | |
3131 | CGF.EmitStoreOfScalar(value: PrevLBVal, lvalue: LB); |
3132 | CGF.EmitStoreOfScalar(value: PrevUBVal, lvalue: UB); |
3133 | |
3134 | return {LB, UB}; |
3135 | } |
3136 | |
3137 | /// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then |
3138 | /// we need to use the LB and UB expressions generated by the worksharing |
3139 | /// code generation support, whereas in non combined situations we would |
3140 | /// just emit 0 and the LastIteration expression |
3141 | /// This function is necessary due to the difference of the LB and UB |
3142 | /// types for the RT emission routines for 'for_static_init' and |
3143 | /// 'for_dispatch_init' |
3144 | static std::pair<llvm::Value *, llvm::Value *> |
3145 | emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF, |
3146 | const OMPExecutableDirective &S, |
3147 | Address LB, Address UB) { |
3148 | const OMPLoopDirective &LS = cast<OMPLoopDirective>(Val: S); |
3149 | const Expr *IVExpr = LS.getIterationVariable(); |
3150 | // when implementing a dynamic schedule for a 'for' combined with a |
3151 | // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop |
3152 | // is not normalized as each team only executes its own assigned |
3153 | // distribute chunk |
3154 | QualType IteratorTy = IVExpr->getType(); |
3155 | llvm::Value *LBVal = |
3156 | CGF.EmitLoadOfScalar(Addr: LB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3157 | llvm::Value *UBVal = |
3158 | CGF.EmitLoadOfScalar(Addr: UB, /*Volatile=*/false, Ty: IteratorTy, Loc: S.getBeginLoc()); |
3159 | return {LBVal, UBVal}; |
3160 | } |
3161 | |
3162 | static void emitDistributeParallelForDistributeInnerBoundParams( |
3163 | CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3164 | llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) { |
3165 | const auto &Dir = cast<OMPLoopDirective>(Val: S); |
3166 | LValue LB = |
3167 | CGF.EmitLValue(cast<DeclRefExpr>(Val: Dir.getCombinedLowerBoundVariable())); |
3168 | llvm::Value *LBCast = |
3169 | CGF.Builder.CreateIntCast(V: CGF.Builder.CreateLoad(Addr: LB.getAddress(CGF)), |
3170 | DestTy: CGF.SizeTy, /*isSigned=*/false); |
3171 | CapturedVars.push_back(Elt: LBCast); |
3172 | LValue UB = |
3173 | CGF.EmitLValue(cast<DeclRefExpr>(Val: Dir.getCombinedUpperBoundVariable())); |
3174 | |
3175 | llvm::Value *UBCast = |
3176 | CGF.Builder.CreateIntCast(V: CGF.Builder.CreateLoad(Addr: UB.getAddress(CGF)), |
3177 | DestTy: CGF.SizeTy, /*isSigned=*/false); |
3178 | CapturedVars.push_back(Elt: UBCast); |
3179 | } |
3180 | |
3181 | static void |
3182 | emitInnerParallelForWhenCombined(CodeGenFunction &CGF, |
3183 | const OMPLoopDirective &S, |
3184 | CodeGenFunction::JumpDest LoopExit) { |
3185 | auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF, |
3186 | PrePostActionTy &Action) { |
3187 | Action.Enter(CGF); |
3188 | bool HasCancel = false; |
3189 | if (!isOpenMPSimdDirective(S.getDirectiveKind())) { |
3190 | if (const auto *D = dyn_cast<OMPTeamsDistributeParallelForDirective>(Val: &S)) |
3191 | HasCancel = D->hasCancel(); |
3192 | else if (const auto *D = dyn_cast<OMPDistributeParallelForDirective>(Val: &S)) |
3193 | HasCancel = D->hasCancel(); |
3194 | else if (const auto *D = |
3195 | dyn_cast<OMPTargetTeamsDistributeParallelForDirective>(Val: &S)) |
3196 | HasCancel = D->hasCancel(); |
3197 | } |
3198 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3199 | HasCancel); |
3200 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getPrevEnsureUpperBound(), |
3201 | CodeGenLoopBounds: emitDistributeParallelForInnerBounds, |
3202 | CGDispatchBounds: emitDistributeParallelForDispatchBounds); |
3203 | }; |
3204 | |
3205 | emitCommonOMPParallelDirective( |
3206 | CGF, S, |
3207 | isOpenMPSimdDirective(S.getDirectiveKind()) ? OMPD_for_simd : OMPD_for, |
3208 | CGInlinedWorksharingLoop, |
3209 | emitDistributeParallelForDistributeInnerBoundParams); |
3210 | } |
3211 | |
3212 | void CodeGenFunction::EmitOMPDistributeParallelForDirective( |
3213 | const OMPDistributeParallelForDirective &S) { |
3214 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3215 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3216 | IncExpr: S.getDistInc()); |
3217 | }; |
3218 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3219 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3220 | } |
3221 | |
3222 | void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective( |
3223 | const OMPDistributeParallelForSimdDirective &S) { |
3224 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3225 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
3226 | IncExpr: S.getDistInc()); |
3227 | }; |
3228 | OMPLexicalScope Scope(*this, S, OMPD_parallel); |
3229 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
3230 | } |
3231 | |
3232 | void CodeGenFunction::EmitOMPDistributeSimdDirective( |
3233 | const OMPDistributeSimdDirective &S) { |
3234 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3235 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
3236 | }; |
3237 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3238 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3239 | } |
3240 | |
3241 | void CodeGenFunction::EmitOMPTargetSimdDeviceFunction( |
3242 | CodeGenModule &CGM, StringRef ParentName, const OMPTargetSimdDirective &S) { |
3243 | // Emit SPMD target parallel for region as a standalone region. |
3244 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3245 | emitOMPSimdRegion(CGF, S, Action); |
3246 | }; |
3247 | llvm::Function *Fn; |
3248 | llvm::Constant *Addr; |
3249 | // Emit target region as a standalone region. |
3250 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
3251 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
3252 | assert(Fn && Addr && "Target device function emission failed." ); |
3253 | } |
3254 | |
3255 | void CodeGenFunction::EmitOMPTargetSimdDirective( |
3256 | const OMPTargetSimdDirective &S) { |
3257 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3258 | emitOMPSimdRegion(CGF, S, Action); |
3259 | }; |
3260 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
3261 | } |
3262 | |
3263 | namespace { |
3264 | struct ScheduleKindModifiersTy { |
3265 | OpenMPScheduleClauseKind Kind; |
3266 | OpenMPScheduleClauseModifier M1; |
3267 | OpenMPScheduleClauseModifier M2; |
3268 | ScheduleKindModifiersTy(OpenMPScheduleClauseKind Kind, |
3269 | OpenMPScheduleClauseModifier M1, |
3270 | OpenMPScheduleClauseModifier M2) |
3271 | : Kind(Kind), M1(M1), M2(M2) {} |
3272 | }; |
3273 | } // namespace |
3274 | |
3275 | bool CodeGenFunction::EmitOMPWorksharingLoop( |
3276 | const OMPLoopDirective &S, Expr *EUB, |
3277 | const CodeGenLoopBoundsTy &CodeGenLoopBounds, |
3278 | const CodeGenDispatchBoundsTy &CGDispatchBounds) { |
3279 | // Emit the loop iteration variable. |
3280 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
3281 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
3282 | EmitVarDecl(D: *IVDecl); |
3283 | |
3284 | // Emit the iterations count variable. |
3285 | // If it is not a variable, Sema decided to calculate iterations count on each |
3286 | // iteration (e.g., it is foldable into a constant). |
3287 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
3288 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
3289 | // Emit calculation of the iterations count. |
3290 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
3291 | } |
3292 | |
3293 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
3294 | |
3295 | bool HasLastprivateClause; |
3296 | // Check pre-condition. |
3297 | { |
3298 | OMPLoopScope PreInitScope(*this, S); |
3299 | // Skip the entire loop if we don't meet the precondition. |
3300 | // If the condition constant folds and can be elided, avoid emitting the |
3301 | // whole loop. |
3302 | bool CondConstant; |
3303 | llvm::BasicBlock *ContBlock = nullptr; |
3304 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
3305 | if (!CondConstant) |
3306 | return false; |
3307 | } else { |
3308 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
3309 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
3310 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
3311 | TrueCount: getProfileCount(&S)); |
3312 | EmitBlock(BB: ThenBlock); |
3313 | incrementProfileCounter(&S); |
3314 | } |
3315 | |
3316 | RunCleanupsScope DoacrossCleanupScope(*this); |
3317 | bool Ordered = false; |
3318 | if (const auto *OrderedClause = S.getSingleClause<OMPOrderedClause>()) { |
3319 | if (OrderedClause->getNumForLoops()) |
3320 | RT.emitDoacrossInit(CGF&: *this, D: S, NumIterations: OrderedClause->getLoopNumIterations()); |
3321 | else |
3322 | Ordered = true; |
3323 | } |
3324 | |
3325 | llvm::DenseSet<const Expr *> EmittedFinals; |
3326 | emitAlignedClause(*this, S); |
3327 | bool HasLinears = EmitOMPLinearClauseInit(D: S); |
3328 | // Emit helper vars inits. |
3329 | |
3330 | std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S); |
3331 | LValue LB = Bounds.first; |
3332 | LValue UB = Bounds.second; |
3333 | LValue ST = |
3334 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
3335 | LValue IL = |
3336 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
3337 | |
3338 | // Emit 'then' code. |
3339 | { |
3340 | OMPPrivateScope LoopScope(*this); |
3341 | if (EmitOMPFirstprivateClause(S, LoopScope) || HasLinears) { |
3342 | // Emit implicit barrier to synchronize threads and avoid data races on |
3343 | // initialization of firstprivate variables and post-update of |
3344 | // lastprivate variables. |
3345 | CGM.getOpenMPRuntime().emitBarrierCall( |
3346 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3347 | /*ForceSimpleCall=*/true); |
3348 | } |
3349 | EmitOMPPrivateClause(S, LoopScope); |
3350 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion( |
3351 | *this, S, EmitLValue(E: S.getIterationVariable())); |
3352 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
3353 | EmitOMPReductionClauseInit(S, LoopScope); |
3354 | EmitOMPPrivateLoopCounters(S, LoopScope); |
3355 | EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
3356 | (void)LoopScope.Privatize(); |
3357 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
3358 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
3359 | |
3360 | // Detect the loop schedule kind and chunk. |
3361 | const Expr *ChunkExpr = nullptr; |
3362 | OpenMPScheduleTy ScheduleKind; |
3363 | if (const auto *C = S.getSingleClause<OMPScheduleClause>()) { |
3364 | ScheduleKind.Schedule = C->getScheduleKind(); |
3365 | ScheduleKind.M1 = C->getFirstScheduleModifier(); |
3366 | ScheduleKind.M2 = C->getSecondScheduleModifier(); |
3367 | ChunkExpr = C->getChunkSize(); |
3368 | } else { |
3369 | // Default behaviour for schedule clause. |
3370 | CGM.getOpenMPRuntime().getDefaultScheduleAndChunk( |
3371 | CGF&: *this, S, ScheduleKind&: ScheduleKind.Schedule, ChunkExpr); |
3372 | } |
3373 | bool HasChunkSizeOne = false; |
3374 | llvm::Value *Chunk = nullptr; |
3375 | if (ChunkExpr) { |
3376 | Chunk = EmitScalarExpr(E: ChunkExpr); |
3377 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: ChunkExpr->getType(), |
3378 | DstTy: S.getIterationVariable()->getType(), |
3379 | Loc: S.getBeginLoc()); |
3380 | Expr::EvalResult Result; |
3381 | if (ChunkExpr->EvaluateAsInt(Result, Ctx: getContext())) { |
3382 | llvm::APSInt EvaluatedChunk = Result.Val.getInt(); |
3383 | HasChunkSizeOne = (EvaluatedChunk.getLimitedValue() == 1); |
3384 | } |
3385 | } |
3386 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
3387 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
3388 | // OpenMP 4.5, 2.7.1 Loop Construct, Description. |
3389 | // If the static schedule kind is specified or if the ordered clause is |
3390 | // specified, and if no monotonic modifier is specified, the effect will |
3391 | // be as if the monotonic modifier was specified. |
3392 | bool StaticChunkedOne = |
3393 | RT.isStaticChunked(ScheduleKind.Schedule, |
3394 | /* Chunked */ Chunk != nullptr) && |
3395 | HasChunkSizeOne && |
3396 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
3397 | bool IsMonotonic = |
3398 | Ordered || |
3399 | (ScheduleKind.Schedule == OMPC_SCHEDULE_static && |
3400 | !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || |
3401 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || |
3402 | ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || |
3403 | ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic; |
3404 | if ((RT.isStaticNonchunked(ScheduleKind: ScheduleKind.Schedule, |
3405 | /* Chunked */ Chunk != nullptr) || |
3406 | StaticChunkedOne) && |
3407 | !Ordered) { |
3408 | JumpDest LoopExit = |
3409 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
3410 | emitCommonSimdLoop( |
3411 | *this, S, |
3412 | [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
3413 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3414 | CGF.EmitOMPSimdInit(D: S); |
3415 | } else if (const auto *C = S.getSingleClause<OMPOrderClause>()) { |
3416 | if (C->getKind() == OMPC_ORDER_concurrent) |
3417 | CGF.LoopStack.setParallel(/*Enable=*/true); |
3418 | } |
3419 | }, |
3420 | [IVSize, IVSigned, Ordered, IL, LB, UB, ST, StaticChunkedOne, Chunk, |
3421 | &S, ScheduleKind, LoopExit, |
3422 | &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
3423 | // OpenMP [2.7.1, Loop Construct, Description, table 2-1] |
3424 | // When no chunk_size is specified, the iteration space is divided |
3425 | // into chunks that are approximately equal in size, and at most |
3426 | // one chunk is distributed to each thread. Note that the size of |
3427 | // the chunks is unspecified in this case. |
3428 | CGOpenMPRuntime::StaticRTInput StaticInit( |
3429 | IVSize, IVSigned, Ordered, IL.getAddress(CGF), |
3430 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF), |
3431 | StaticChunkedOne ? Chunk : nullptr); |
3432 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
3433 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, |
3434 | StaticInit); |
3435 | // UB = min(UB, GlobalUB); |
3436 | if (!StaticChunkedOne) |
3437 | CGF.EmitIgnoredExpr(E: S.getEnsureUpperBound()); |
3438 | // IV = LB; |
3439 | CGF.EmitIgnoredExpr(E: S.getInit()); |
3440 | // For unchunked static schedule generate: |
3441 | // |
3442 | // while (idx <= UB) { |
3443 | // BODY; |
3444 | // ++idx; |
3445 | // } |
3446 | // |
3447 | // For static schedule with chunk one: |
3448 | // |
3449 | // while (IV <= PrevUB) { |
3450 | // BODY; |
3451 | // IV += ST; |
3452 | // } |
3453 | CGF.EmitOMPInnerLoop( |
3454 | S, LoopScope.requiresCleanups(), |
3455 | StaticChunkedOne ? S.getCombinedParForInDistCond() |
3456 | : S.getCond(), |
3457 | StaticChunkedOne ? S.getDistInc() : S.getInc(), |
3458 | [&S, LoopExit](CodeGenFunction &CGF) { |
3459 | emitOMPLoopBodyWithStopPoint(CGF, S, LoopExit); |
3460 | }, |
3461 | [](CodeGenFunction &) {}); |
3462 | }); |
3463 | EmitBlock(BB: LoopExit.getBlock()); |
3464 | // Tell the runtime we are done. |
3465 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
3466 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
3467 | OMPD_for); |
3468 | }; |
3469 | OMPCancelStack.emitExit(*this, S.getDirectiveKind(), CodeGen); |
3470 | } else { |
3471 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
3472 | // runtime and runs the inner loop to process it. |
3473 | OMPLoopArguments LoopArguments( |
3474 | LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this), |
3475 | IL.getAddress(CGF&: *this), Chunk, EUB); |
3476 | LoopArguments.DKind = OMPD_for; |
3477 | EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered, |
3478 | LoopArgs: LoopArguments, CGDispatchBounds); |
3479 | } |
3480 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
3481 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3482 | return CGF.Builder.CreateIsNotNull( |
3483 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3484 | }); |
3485 | } |
3486 | EmitOMPReductionClauseFinal( |
3487 | S, /*ReductionKind=*/isOpenMPSimdDirective(S.getDirectiveKind()) |
3488 | ? /*Parallel and Simd*/ OMPD_parallel_for_simd |
3489 | : /*Parallel only*/ OMPD_parallel); |
3490 | // Emit post-update of the reduction variables if IsLastIter != 0. |
3491 | emitPostUpdateForReductionClause( |
3492 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
3493 | return CGF.Builder.CreateIsNotNull( |
3494 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3495 | }); |
3496 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
3497 | if (HasLastprivateClause) |
3498 | EmitOMPLastprivateClauseFinal( |
3499 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
3500 | Builder.CreateIsNotNull(EmitLoadOfScalar(IL, S.getBeginLoc()))); |
3501 | LoopScope.restoreMap(); |
3502 | EmitOMPLinearClauseFinal(S, [IL, &S](CodeGenFunction &CGF) { |
3503 | return CGF.Builder.CreateIsNotNull( |
3504 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
3505 | }); |
3506 | } |
3507 | DoacrossCleanupScope.ForceCleanup(); |
3508 | // We're now done with the loop, so jump to the continuation block. |
3509 | if (ContBlock) { |
3510 | EmitBranch(Block: ContBlock); |
3511 | EmitBlock(BB: ContBlock, /*IsFinished=*/true); |
3512 | } |
3513 | } |
3514 | return HasLastprivateClause; |
3515 | } |
3516 | |
3517 | /// The following two functions generate expressions for the loop lower |
3518 | /// and upper bounds in case of static and dynamic (dispatch) schedule |
3519 | /// of the associated 'for' or 'distribute' loop. |
3520 | static std::pair<LValue, LValue> |
3521 | emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
3522 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3523 | LValue LB = |
3524 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getLowerBoundVariable())); |
3525 | LValue UB = |
3526 | EmitOMPHelperVar(CGF, Helper: cast<DeclRefExpr>(Val: LS.getUpperBoundVariable())); |
3527 | return {LB, UB}; |
3528 | } |
3529 | |
3530 | /// When dealing with dispatch schedules (e.g. dynamic, guided) we do not |
3531 | /// consider the lower and upper bound expressions generated by the |
3532 | /// worksharing loop support, but we use 0 and the iteration space size as |
3533 | /// constants |
3534 | static std::pair<llvm::Value *, llvm::Value *> |
3535 | emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S, |
3536 | Address LB, Address UB) { |
3537 | const auto &LS = cast<OMPLoopDirective>(Val: S); |
3538 | const Expr *IVExpr = LS.getIterationVariable(); |
3539 | const unsigned IVSize = CGF.getContext().getTypeSize(T: IVExpr->getType()); |
3540 | llvm::Value *LBVal = CGF.Builder.getIntN(N: IVSize, C: 0); |
3541 | llvm::Value *UBVal = CGF.EmitScalarExpr(E: LS.getLastIteration()); |
3542 | return {LBVal, UBVal}; |
3543 | } |
3544 | |
3545 | /// Emits internal temp array declarations for the directive with inscan |
3546 | /// reductions. |
3547 | /// The code is the following: |
3548 | /// \code |
3549 | /// size num_iters = <num_iters>; |
3550 | /// <type> buffer[num_iters]; |
3551 | /// \endcode |
3552 | static void emitScanBasedDirectiveDecls( |
3553 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3554 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3555 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3556 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3557 | SmallVector<const Expr *, 4> Shareds; |
3558 | SmallVector<const Expr *, 4> Privates; |
3559 | SmallVector<const Expr *, 4> ReductionOps; |
3560 | SmallVector<const Expr *, 4> CopyArrayTemps; |
3561 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3562 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3563 | "Only inscan reductions are expected." ); |
3564 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3565 | Privates.append(C->privates().begin(), C->privates().end()); |
3566 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3567 | CopyArrayTemps.append(C->copy_array_temps().begin(), |
3568 | C->copy_array_temps().end()); |
3569 | } |
3570 | { |
3571 | // Emit buffers for each reduction variables. |
3572 | // ReductionCodeGen is required to emit correctly the code for array |
3573 | // reductions. |
3574 | ReductionCodeGen RedCG(Shareds, Shareds, Privates, ReductionOps); |
3575 | unsigned Count = 0; |
3576 | auto *ITA = CopyArrayTemps.begin(); |
3577 | for (const Expr *IRef : Privates) { |
3578 | const auto *PrivateVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IRef)->getDecl()); |
3579 | // Emit variably modified arrays, used for arrays/array sections |
3580 | // reductions. |
3581 | if (PrivateVD->getType()->isVariablyModifiedType()) { |
3582 | RedCG.emitSharedOrigLValue(CGF, N: Count); |
3583 | RedCG.emitAggregateType(CGF, N: Count); |
3584 | } |
3585 | CodeGenFunction::OpaqueValueMapping DimMapping( |
3586 | CGF, |
3587 | cast<OpaqueValueExpr>( |
3588 | cast<VariableArrayType>((*ITA)->getType()->getAsArrayTypeUnsafe()) |
3589 | ->getSizeExpr()), |
3590 | RValue::get(V: OMPScanNumIterations)); |
3591 | // Emit temp buffer. |
3592 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ITA)->getDecl())); |
3593 | ++ITA; |
3594 | ++Count; |
3595 | } |
3596 | } |
3597 | } |
3598 | |
3599 | /// Copies final inscan reductions values to the original variables. |
3600 | /// The code is the following: |
3601 | /// \code |
3602 | /// <orig_var> = buffer[num_iters-1]; |
3603 | /// \endcode |
3604 | static void emitScanBasedDirectiveFinals( |
3605 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3606 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen) { |
3607 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3608 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3609 | SmallVector<const Expr *, 4> Shareds; |
3610 | SmallVector<const Expr *, 4> LHSs; |
3611 | SmallVector<const Expr *, 4> RHSs; |
3612 | SmallVector<const Expr *, 4> Privates; |
3613 | SmallVector<const Expr *, 4> CopyOps; |
3614 | SmallVector<const Expr *, 4> CopyArrayElems; |
3615 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3616 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3617 | "Only inscan reductions are expected." ); |
3618 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
3619 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3620 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3621 | Privates.append(C->privates().begin(), C->privates().end()); |
3622 | CopyOps.append(C->copy_ops().begin(), C->copy_ops().end()); |
3623 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3624 | C->copy_array_elems().end()); |
3625 | } |
3626 | // Create temp var and copy LHS value to this temp value. |
3627 | // LHS = TMP[LastIter]; |
3628 | llvm::Value *OMPLast = CGF.Builder.CreateNSWSub( |
3629 | LHS: OMPScanNumIterations, |
3630 | RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1, /*isSigned=*/IsSigned: false)); |
3631 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
3632 | const Expr *PrivateExpr = Privates[I]; |
3633 | const Expr *OrigExpr = Shareds[I]; |
3634 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
3635 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3636 | CGF, |
3637 | cast<OpaqueValueExpr>( |
3638 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3639 | RValue::get(V: OMPLast)); |
3640 | LValue DestLVal = CGF.EmitLValue(E: OrigExpr); |
3641 | LValue SrcLVal = CGF.EmitLValue(E: CopyArrayElem); |
3642 | CGF.EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF), |
3643 | SrcAddr: SrcLVal.getAddress(CGF), |
3644 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
3645 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
3646 | Copy: CopyOps[I]); |
3647 | } |
3648 | } |
3649 | |
3650 | /// Emits the code for the directive with inscan reductions. |
3651 | /// The code is the following: |
3652 | /// \code |
3653 | /// #pragma omp ... |
3654 | /// for (i: 0..<num_iters>) { |
3655 | /// <input phase>; |
3656 | /// buffer[i] = red; |
3657 | /// } |
3658 | /// #pragma omp master // in parallel region |
3659 | /// for (int k = 0; k != ceil(log2(num_iters)); ++k) |
3660 | /// for (size cnt = last_iter; cnt >= pow(2, k); --k) |
3661 | /// buffer[i] op= buffer[i-pow(2,k)]; |
3662 | /// #pragma omp barrier // in parallel region |
3663 | /// #pragma omp ... |
3664 | /// for (0..<num_iters>) { |
3665 | /// red = InclusiveScan ? buffer[i] : buffer[i-1]; |
3666 | /// <scan phase>; |
3667 | /// } |
3668 | /// \endcode |
3669 | static void emitScanBasedDirective( |
3670 | CodeGenFunction &CGF, const OMPLoopDirective &S, |
3671 | llvm::function_ref<llvm::Value *(CodeGenFunction &)> NumIteratorsGen, |
3672 | llvm::function_ref<void(CodeGenFunction &)> FirstGen, |
3673 | llvm::function_ref<void(CodeGenFunction &)> SecondGen) { |
3674 | llvm::Value *OMPScanNumIterations = CGF.Builder.CreateIntCast( |
3675 | V: NumIteratorsGen(CGF), DestTy: CGF.SizeTy, /*isSigned=*/false); |
3676 | SmallVector<const Expr *, 4> Privates; |
3677 | SmallVector<const Expr *, 4> ReductionOps; |
3678 | SmallVector<const Expr *, 4> LHSs; |
3679 | SmallVector<const Expr *, 4> RHSs; |
3680 | SmallVector<const Expr *, 4> CopyArrayElems; |
3681 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
3682 | assert(C->getModifier() == OMPC_REDUCTION_inscan && |
3683 | "Only inscan reductions are expected." ); |
3684 | Privates.append(C->privates().begin(), C->privates().end()); |
3685 | ReductionOps.append(C->reduction_ops().begin(), C->reduction_ops().end()); |
3686 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
3687 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
3688 | CopyArrayElems.append(C->copy_array_elems().begin(), |
3689 | C->copy_array_elems().end()); |
3690 | } |
3691 | CodeGenFunction::ParentLoopDirectiveForScanRegion ScanRegion(CGF, S); |
3692 | { |
3693 | // Emit loop with input phase: |
3694 | // #pragma omp ... |
3695 | // for (i: 0..<num_iters>) { |
3696 | // <input phase>; |
3697 | // buffer[i] = red; |
3698 | // } |
3699 | CGF.OMPFirstScanLoop = true; |
3700 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3701 | FirstGen(CGF); |
3702 | } |
3703 | // #pragma omp barrier // in parallel region |
3704 | auto &&CodeGen = [&S, OMPScanNumIterations, &LHSs, &RHSs, &CopyArrayElems, |
3705 | &ReductionOps, |
3706 | &Privates](CodeGenFunction &CGF, PrePostActionTy &Action) { |
3707 | Action.Enter(CGF); |
3708 | // Emit prefix reduction: |
3709 | // #pragma omp master // in parallel region |
3710 | // for (int k = 0; k <= ceil(log2(n)); ++k) |
3711 | llvm::BasicBlock *InputBB = CGF.Builder.GetInsertBlock(); |
3712 | llvm::BasicBlock *LoopBB = CGF.createBasicBlock(name: "omp.outer.log.scan.body" ); |
3713 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: "omp.outer.log.scan.exit" ); |
3714 | llvm::Function *F = |
3715 | CGF.CGM.getIntrinsic(llvm::Intrinsic::log2, CGF.DoubleTy); |
3716 | llvm::Value *Arg = |
3717 | CGF.Builder.CreateUIToFP(V: OMPScanNumIterations, DestTy: CGF.DoubleTy); |
3718 | llvm::Value *LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: Arg); |
3719 | F = CGF.CGM.getIntrinsic(llvm::Intrinsic::ceil, CGF.DoubleTy); |
3720 | LogVal = CGF.EmitNounwindRuntimeCall(callee: F, args: LogVal); |
3721 | LogVal = CGF.Builder.CreateFPToUI(V: LogVal, DestTy: CGF.IntTy); |
3722 | llvm::Value *NMin1 = CGF.Builder.CreateNUWSub( |
3723 | LHS: OMPScanNumIterations, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3724 | auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getBeginLoc()); |
3725 | CGF.EmitBlock(BB: LoopBB); |
3726 | auto *Counter = CGF.Builder.CreatePHI(Ty: CGF.IntTy, NumReservedValues: 2); |
3727 | // size pow2k = 1; |
3728 | auto *Pow2K = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3729 | Counter->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 0), BB: InputBB); |
3730 | Pow2K->addIncoming(V: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1), BB: InputBB); |
3731 | // for (size i = n - 1; i >= 2 ^ k; --i) |
3732 | // tmp[i] op= tmp[i-pow2k]; |
3733 | llvm::BasicBlock *InnerLoopBB = |
3734 | CGF.createBasicBlock(name: "omp.inner.log.scan.body" ); |
3735 | llvm::BasicBlock *InnerExitBB = |
3736 | CGF.createBasicBlock(name: "omp.inner.log.scan.exit" ); |
3737 | llvm::Value *CmpI = CGF.Builder.CreateICmpUGE(LHS: NMin1, RHS: Pow2K); |
3738 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3739 | CGF.EmitBlock(BB: InnerLoopBB); |
3740 | auto *IVal = CGF.Builder.CreatePHI(Ty: CGF.SizeTy, NumReservedValues: 2); |
3741 | IVal->addIncoming(V: NMin1, BB: LoopBB); |
3742 | { |
3743 | CodeGenFunction::OMPPrivateScope PrivScope(CGF); |
3744 | auto *ILHS = LHSs.begin(); |
3745 | auto *IRHS = RHSs.begin(); |
3746 | for (const Expr *CopyArrayElem : CopyArrayElems) { |
3747 | const auto *LHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ILHS)->getDecl()); |
3748 | const auto *RHSVD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *IRHS)->getDecl()); |
3749 | Address LHSAddr = Address::invalid(); |
3750 | { |
3751 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3752 | CGF, |
3753 | cast<OpaqueValueExpr>( |
3754 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3755 | RValue::get(V: IVal)); |
3756 | LHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(CGF); |
3757 | } |
3758 | PrivScope.addPrivate(LocalVD: LHSVD, Addr: LHSAddr); |
3759 | Address RHSAddr = Address::invalid(); |
3760 | { |
3761 | llvm::Value *OffsetIVal = CGF.Builder.CreateNUWSub(LHS: IVal, RHS: Pow2K); |
3762 | CodeGenFunction::OpaqueValueMapping IdxMapping( |
3763 | CGF, |
3764 | cast<OpaqueValueExpr>( |
3765 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
3766 | RValue::get(V: OffsetIVal)); |
3767 | RHSAddr = CGF.EmitLValue(E: CopyArrayElem).getAddress(CGF); |
3768 | } |
3769 | PrivScope.addPrivate(LocalVD: RHSVD, Addr: RHSAddr); |
3770 | ++ILHS; |
3771 | ++IRHS; |
3772 | } |
3773 | PrivScope.Privatize(); |
3774 | CGF.CGM.getOpenMPRuntime().emitReduction( |
3775 | CGF, S.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
3776 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_unknown}); |
3777 | } |
3778 | llvm::Value *NextIVal = |
3779 | CGF.Builder.CreateNUWSub(LHS: IVal, RHS: llvm::ConstantInt::get(Ty: CGF.SizeTy, V: 1)); |
3780 | IVal->addIncoming(V: NextIVal, BB: CGF.Builder.GetInsertBlock()); |
3781 | CmpI = CGF.Builder.CreateICmpUGE(LHS: NextIVal, RHS: Pow2K); |
3782 | CGF.Builder.CreateCondBr(Cond: CmpI, True: InnerLoopBB, False: InnerExitBB); |
3783 | CGF.EmitBlock(BB: InnerExitBB); |
3784 | llvm::Value *Next = |
3785 | CGF.Builder.CreateNUWAdd(LHS: Counter, RHS: llvm::ConstantInt::get(Ty: CGF.IntTy, V: 1)); |
3786 | Counter->addIncoming(V: Next, BB: CGF.Builder.GetInsertBlock()); |
3787 | // pow2k <<= 1; |
3788 | llvm::Value *NextPow2K = |
3789 | CGF.Builder.CreateShl(LHS: Pow2K, RHS: 1, Name: "" , /*HasNUW=*/true); |
3790 | Pow2K->addIncoming(V: NextPow2K, BB: CGF.Builder.GetInsertBlock()); |
3791 | llvm::Value *Cmp = CGF.Builder.CreateICmpNE(LHS: Next, RHS: LogVal); |
3792 | CGF.Builder.CreateCondBr(Cond: Cmp, True: LoopBB, False: ExitBB); |
3793 | auto DL1 = ApplyDebugLocation::CreateDefaultArtificial(CGF, TemporaryLocation: S.getEndLoc()); |
3794 | CGF.EmitBlock(BB: ExitBB); |
3795 | }; |
3796 | if (isOpenMPParallelDirective(S.getDirectiveKind())) { |
3797 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
3798 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
3799 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
3800 | /*ForceSimpleCall=*/true); |
3801 | } else { |
3802 | RegionCodeGenTy RCG(CodeGen); |
3803 | RCG(CGF); |
3804 | } |
3805 | |
3806 | CGF.OMPFirstScanLoop = false; |
3807 | SecondGen(CGF); |
3808 | } |
3809 | |
3810 | static bool emitWorksharingDirective(CodeGenFunction &CGF, |
3811 | const OMPLoopDirective &S, |
3812 | bool HasCancel) { |
3813 | bool HasLastprivates; |
3814 | if (llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
3815 | [](const OMPReductionClause *C) { |
3816 | return C->getModifier() == OMPC_REDUCTION_inscan; |
3817 | })) { |
3818 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
3819 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
3820 | OMPLoopScope LoopScope(CGF, S); |
3821 | return CGF.EmitScalarExpr(E: S.getNumIterations()); |
3822 | }; |
3823 | const auto &&FirstGen = [&S, HasCancel](CodeGenFunction &CGF) { |
3824 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3825 | CGF, S.getDirectiveKind(), HasCancel); |
3826 | (void)CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3827 | CodeGenLoopBounds: emitForLoopBounds, |
3828 | CGDispatchBounds: emitDispatchForLoopBounds); |
3829 | // Emit an implicit barrier at the end. |
3830 | CGF.CGM.getOpenMPRuntime().emitBarrierCall(CGF, S.getBeginLoc(), |
3831 | OMPD_for); |
3832 | }; |
3833 | const auto &&SecondGen = [&S, HasCancel, |
3834 | &HasLastprivates](CodeGenFunction &CGF) { |
3835 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
3836 | CGF, S.getDirectiveKind(), HasCancel); |
3837 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3838 | CodeGenLoopBounds: emitForLoopBounds, |
3839 | CGDispatchBounds: emitDispatchForLoopBounds); |
3840 | }; |
3841 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3842 | emitScanBasedDirectiveDecls(CGF, S, NumIteratorsGen); |
3843 | emitScanBasedDirective(CGF, S, NumIteratorsGen, FirstGen, SecondGen); |
3844 | if (!isOpenMPParallelDirective(S.getDirectiveKind())) |
3845 | emitScanBasedDirectiveFinals(CGF, S, NumIteratorsGen); |
3846 | } else { |
3847 | CodeGenFunction::OMPCancelStackRAII CancelRegion(CGF, S.getDirectiveKind(), |
3848 | HasCancel); |
3849 | HasLastprivates = CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), |
3850 | CodeGenLoopBounds: emitForLoopBounds, |
3851 | CGDispatchBounds: emitDispatchForLoopBounds); |
3852 | } |
3853 | return HasLastprivates; |
3854 | } |
3855 | |
3856 | static bool isSupportedByOpenMPIRBuilder(const OMPForDirective &S) { |
3857 | if (S.hasCancel()) |
3858 | return false; |
3859 | for (OMPClause *C : S.clauses()) { |
3860 | if (isa<OMPNowaitClause>(C)) |
3861 | continue; |
3862 | |
3863 | if (auto *SC = dyn_cast<OMPScheduleClause>(C)) { |
3864 | if (SC->getFirstScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3865 | return false; |
3866 | if (SC->getSecondScheduleModifier() != OMPC_SCHEDULE_MODIFIER_unknown) |
3867 | return false; |
3868 | switch (SC->getScheduleKind()) { |
3869 | case OMPC_SCHEDULE_auto: |
3870 | case OMPC_SCHEDULE_dynamic: |
3871 | case OMPC_SCHEDULE_runtime: |
3872 | case OMPC_SCHEDULE_guided: |
3873 | case OMPC_SCHEDULE_static: |
3874 | continue; |
3875 | case OMPC_SCHEDULE_unknown: |
3876 | return false; |
3877 | } |
3878 | } |
3879 | |
3880 | return false; |
3881 | } |
3882 | |
3883 | return true; |
3884 | } |
3885 | |
3886 | static llvm::omp::ScheduleKind |
3887 | convertClauseKindToSchedKind(OpenMPScheduleClauseKind ScheduleClauseKind) { |
3888 | switch (ScheduleClauseKind) { |
3889 | case OMPC_SCHEDULE_unknown: |
3890 | return llvm::omp::OMP_SCHEDULE_Default; |
3891 | case OMPC_SCHEDULE_auto: |
3892 | return llvm::omp::OMP_SCHEDULE_Auto; |
3893 | case OMPC_SCHEDULE_dynamic: |
3894 | return llvm::omp::OMP_SCHEDULE_Dynamic; |
3895 | case OMPC_SCHEDULE_guided: |
3896 | return llvm::omp::OMP_SCHEDULE_Guided; |
3897 | case OMPC_SCHEDULE_runtime: |
3898 | return llvm::omp::OMP_SCHEDULE_Runtime; |
3899 | case OMPC_SCHEDULE_static: |
3900 | return llvm::omp::OMP_SCHEDULE_Static; |
3901 | } |
3902 | llvm_unreachable("Unhandled schedule kind" ); |
3903 | } |
3904 | |
3905 | void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) { |
3906 | bool HasLastprivates = false; |
3907 | bool UseOMPIRBuilder = |
3908 | CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(S); |
3909 | auto &&CodeGen = [this, &S, &HasLastprivates, |
3910 | UseOMPIRBuilder](CodeGenFunction &CGF, PrePostActionTy &) { |
3911 | // Use the OpenMPIRBuilder if enabled. |
3912 | if (UseOMPIRBuilder) { |
3913 | bool NeedsBarrier = !S.getSingleClause<OMPNowaitClause>(); |
3914 | |
3915 | llvm::omp::ScheduleKind SchedKind = llvm::omp::OMP_SCHEDULE_Default; |
3916 | llvm::Value *ChunkSize = nullptr; |
3917 | if (auto *SchedClause = S.getSingleClause<OMPScheduleClause>()) { |
3918 | SchedKind = |
3919 | convertClauseKindToSchedKind(SchedClause->getScheduleKind()); |
3920 | if (const Expr *ChunkSizeExpr = SchedClause->getChunkSize()) |
3921 | ChunkSize = EmitScalarExpr(E: ChunkSizeExpr); |
3922 | } |
3923 | |
3924 | // Emit the associated statement and get its loop representation. |
3925 | const Stmt *Inner = S.getRawStmt(); |
3926 | llvm::CanonicalLoopInfo *CLI = |
3927 | EmitOMPCollapsedCanonicalLoopNest(S: Inner, Depth: 1); |
3928 | |
3929 | llvm::OpenMPIRBuilder &OMPBuilder = |
3930 | CGM.getOpenMPRuntime().getOMPBuilder(); |
3931 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
3932 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
3933 | OMPBuilder.applyWorkshareLoop( |
3934 | Builder.getCurrentDebugLocation(), CLI, AllocaIP, NeedsBarrier, |
3935 | SchedKind, ChunkSize, /*HasSimdModifier=*/false, |
3936 | /*HasMonotonicModifier=*/false, /*HasNonmonotonicModifier=*/false, |
3937 | /*HasOrderedClause=*/false); |
3938 | return; |
3939 | } |
3940 | |
3941 | HasLastprivates = emitWorksharingDirective(CGF, S, S.hasCancel()); |
3942 | }; |
3943 | { |
3944 | auto LPCRegion = |
3945 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3946 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3947 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_for, CodeGen, |
3948 | S.hasCancel()); |
3949 | } |
3950 | |
3951 | if (!UseOMPIRBuilder) { |
3952 | // Emit an implicit barrier at the end. |
3953 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3954 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3955 | } |
3956 | // Check for outer lastprivate conditional update. |
3957 | checkForLastprivateConditionalUpdate(*this, S); |
3958 | } |
3959 | |
3960 | void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) { |
3961 | bool HasLastprivates = false; |
3962 | auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF, |
3963 | PrePostActionTy &) { |
3964 | HasLastprivates = emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
3965 | }; |
3966 | { |
3967 | auto LPCRegion = |
3968 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
3969 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
3970 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen); |
3971 | } |
3972 | |
3973 | // Emit an implicit barrier at the end. |
3974 | if (!S.getSingleClause<OMPNowaitClause>() || HasLastprivates) |
3975 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_for); |
3976 | // Check for outer lastprivate conditional update. |
3977 | checkForLastprivateConditionalUpdate(*this, S); |
3978 | } |
3979 | |
3980 | static LValue createSectionLVal(CodeGenFunction &CGF, QualType Ty, |
3981 | const Twine &Name, |
3982 | llvm::Value *Init = nullptr) { |
3983 | LValue LVal = CGF.MakeAddrLValue(Addr: CGF.CreateMemTemp(T: Ty, Name), T: Ty); |
3984 | if (Init) |
3985 | CGF.EmitStoreThroughLValue(Src: RValue::get(V: Init), Dst: LVal, /*isInit*/ true); |
3986 | return LVal; |
3987 | } |
3988 | |
3989 | void CodeGenFunction::EmitSections(const OMPExecutableDirective &S) { |
3990 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
3991 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
3992 | bool HasLastprivates = false; |
3993 | auto &&CodeGen = [&S, CapturedStmt, CS, |
3994 | &HasLastprivates](CodeGenFunction &CGF, PrePostActionTy &) { |
3995 | const ASTContext &C = CGF.getContext(); |
3996 | QualType KmpInt32Ty = |
3997 | C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1); |
3998 | // Emit helper vars inits. |
3999 | LValue LB = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.lb." , |
4000 | Init: CGF.Builder.getInt32(C: 0)); |
4001 | llvm::ConstantInt *GlobalUBVal = CS != nullptr |
4002 | ? CGF.Builder.getInt32(C: CS->size() - 1) |
4003 | : CGF.Builder.getInt32(C: 0); |
4004 | LValue UB = |
4005 | createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.ub." , Init: GlobalUBVal); |
4006 | LValue ST = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.st." , |
4007 | Init: CGF.Builder.getInt32(C: 1)); |
4008 | LValue IL = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.il." , |
4009 | Init: CGF.Builder.getInt32(C: 0)); |
4010 | // Loop counter. |
4011 | LValue IV = createSectionLVal(CGF, Ty: KmpInt32Ty, Name: ".omp.sections.iv." ); |
4012 | OpaqueValueExpr IVRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4013 | CodeGenFunction::OpaqueValueMapping OpaqueIV(CGF, &IVRefExpr, IV); |
4014 | OpaqueValueExpr UBRefExpr(S.getBeginLoc(), KmpInt32Ty, VK_LValue); |
4015 | CodeGenFunction::OpaqueValueMapping OpaqueUB(CGF, &UBRefExpr, UB); |
4016 | // Generate condition for loop. |
4017 | BinaryOperator *Cond = BinaryOperator::Create( |
4018 | C, lhs: &IVRefExpr, rhs: &UBRefExpr, opc: BO_LE, ResTy: C.BoolTy, VK: VK_PRValue, OK: OK_Ordinary, |
4019 | opLoc: S.getBeginLoc(), FPFeatures: FPOptionsOverride()); |
4020 | // Increment for loop counter. |
4021 | UnaryOperator *Inc = UnaryOperator::Create( |
4022 | C, &IVRefExpr, UO_PreInc, KmpInt32Ty, VK_PRValue, OK_Ordinary, |
4023 | S.getBeginLoc(), true, FPOptionsOverride()); |
4024 | auto &&BodyGen = [CapturedStmt, CS, &S, &IV](CodeGenFunction &CGF) { |
4025 | // Iterate through all sections and emit a switch construct: |
4026 | // switch (IV) { |
4027 | // case 0: |
4028 | // <SectionStmt[0]>; |
4029 | // break; |
4030 | // ... |
4031 | // case <NumSection> - 1: |
4032 | // <SectionStmt[<NumSection> - 1]>; |
4033 | // break; |
4034 | // } |
4035 | // .omp.sections.exit: |
4036 | llvm::BasicBlock *ExitBB = CGF.createBasicBlock(name: ".omp.sections.exit" ); |
4037 | llvm::SwitchInst *SwitchStmt = |
4038 | CGF.Builder.CreateSwitch(V: CGF.EmitLoadOfScalar(lvalue: IV, Loc: S.getBeginLoc()), |
4039 | Dest: ExitBB, NumCases: CS == nullptr ? 1 : CS->size()); |
4040 | if (CS) { |
4041 | unsigned CaseNumber = 0; |
4042 | for (const Stmt *SubStmt : CS->children()) { |
4043 | auto CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4044 | CGF.EmitBlock(BB: CaseBB); |
4045 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: CaseNumber), Dest: CaseBB); |
4046 | CGF.EmitStmt(S: SubStmt); |
4047 | CGF.EmitBranch(Block: ExitBB); |
4048 | ++CaseNumber; |
4049 | } |
4050 | } else { |
4051 | llvm::BasicBlock *CaseBB = CGF.createBasicBlock(name: ".omp.sections.case" ); |
4052 | CGF.EmitBlock(BB: CaseBB); |
4053 | SwitchStmt->addCase(OnVal: CGF.Builder.getInt32(C: 0), Dest: CaseBB); |
4054 | CGF.EmitStmt(S: CapturedStmt); |
4055 | CGF.EmitBranch(Block: ExitBB); |
4056 | } |
4057 | CGF.EmitBlock(BB: ExitBB, /*IsFinished=*/true); |
4058 | }; |
4059 | |
4060 | CodeGenFunction::OMPPrivateScope LoopScope(CGF); |
4061 | if (CGF.EmitOMPFirstprivateClause(D: S, PrivateScope&: LoopScope)) { |
4062 | // Emit implicit barrier to synchronize threads and avoid data races on |
4063 | // initialization of firstprivate variables and post-update of lastprivate |
4064 | // variables. |
4065 | CGF.CGM.getOpenMPRuntime().emitBarrierCall( |
4066 | CGF, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
4067 | /*ForceSimpleCall=*/true); |
4068 | } |
4069 | CGF.EmitOMPPrivateClause(D: S, PrivateScope&: LoopScope); |
4070 | CGOpenMPRuntime::LastprivateConditionalRAII LPCRegion(CGF, S, IV); |
4071 | HasLastprivates = CGF.EmitOMPLastprivateClauseInit(D: S, PrivateScope&: LoopScope); |
4072 | CGF.EmitOMPReductionClauseInit(D: S, PrivateScope&: LoopScope); |
4073 | (void)LoopScope.Privatize(); |
4074 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
4075 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, D: S); |
4076 | |
4077 | // Emit static non-chunked loop. |
4078 | OpenMPScheduleTy ScheduleKind; |
4079 | ScheduleKind.Schedule = OMPC_SCHEDULE_static; |
4080 | CGOpenMPRuntime::StaticRTInput StaticInit( |
4081 | /*IVSize=*/32, /*IVSigned=*/true, /*Ordered=*/false, IL.getAddress(CGF), |
4082 | LB.getAddress(CGF), UB.getAddress(CGF), ST.getAddress(CGF)); |
4083 | CGF.CGM.getOpenMPRuntime().emitForStaticInit( |
4084 | CGF, S.getBeginLoc(), S.getDirectiveKind(), ScheduleKind, StaticInit); |
4085 | // UB = min(UB, GlobalUB); |
4086 | llvm::Value *UBVal = CGF.EmitLoadOfScalar(lvalue: UB, Loc: S.getBeginLoc()); |
4087 | llvm::Value *MinUBGlobalUB = CGF.Builder.CreateSelect( |
4088 | C: CGF.Builder.CreateICmpSLT(LHS: UBVal, RHS: GlobalUBVal), True: UBVal, False: GlobalUBVal); |
4089 | CGF.EmitStoreOfScalar(value: MinUBGlobalUB, lvalue: UB); |
4090 | // IV = LB; |
4091 | CGF.EmitStoreOfScalar(value: CGF.EmitLoadOfScalar(lvalue: LB, Loc: S.getBeginLoc()), lvalue: IV); |
4092 | // while (idx <= UB) { BODY; ++idx; } |
4093 | CGF.EmitOMPInnerLoop(S, /*RequiresCleanup=*/false, Cond, Inc, BodyGen, |
4094 | [](CodeGenFunction &) {}); |
4095 | // Tell the runtime we are done. |
4096 | auto &&CodeGen = [&S](CodeGenFunction &CGF) { |
4097 | CGF.CGM.getOpenMPRuntime().emitForStaticFinish(CGF, S.getEndLoc(), |
4098 | OMPD_sections); |
4099 | }; |
4100 | CGF.OMPCancelStack.emitExit(CGF, S.getDirectiveKind(), CodeGen); |
4101 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4102 | // Emit post-update of the reduction variables if IsLastIter != 0. |
4103 | emitPostUpdateForReductionClause(CGF, D: S, CondGen: [IL, &S](CodeGenFunction &CGF) { |
4104 | return CGF.Builder.CreateIsNotNull( |
4105 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc())); |
4106 | }); |
4107 | |
4108 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
4109 | if (HasLastprivates) |
4110 | CGF.EmitOMPLastprivateClauseFinal( |
4111 | D: S, /*NoFinals=*/false, |
4112 | IsLastIterCond: CGF.Builder.CreateIsNotNull( |
4113 | Arg: CGF.EmitLoadOfScalar(lvalue: IL, Loc: S.getBeginLoc()))); |
4114 | }; |
4115 | |
4116 | bool HasCancel = false; |
4117 | if (auto *OSD = dyn_cast<OMPSectionsDirective>(Val: &S)) |
4118 | HasCancel = OSD->hasCancel(); |
4119 | else if (auto *OPSD = dyn_cast<OMPParallelSectionsDirective>(Val: &S)) |
4120 | HasCancel = OPSD->hasCancel(); |
4121 | OMPCancelStackRAII CancelRegion(*this, S.getDirectiveKind(), HasCancel); |
4122 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_sections, CodeGen, |
4123 | HasCancel); |
4124 | // Emit barrier for lastprivates only if 'sections' directive has 'nowait' |
4125 | // clause. Otherwise the barrier will be generated by the codegen for the |
4126 | // directive. |
4127 | if (HasLastprivates && S.getSingleClause<OMPNowaitClause>()) { |
4128 | // Emit implicit barrier to synchronize threads and avoid data races on |
4129 | // initialization of firstprivate variables. |
4130 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4131 | OMPD_unknown); |
4132 | } |
4133 | } |
4134 | |
4135 | void CodeGenFunction::EmitOMPSectionsDirective(const OMPSectionsDirective &S) { |
4136 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4137 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4138 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4139 | using BodyGenCallbackTy = llvm::OpenMPIRBuilder::StorableBodyGenCallbackTy; |
4140 | |
4141 | auto FiniCB = [this](InsertPointTy IP) { |
4142 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4143 | }; |
4144 | |
4145 | const CapturedStmt *ICS = S.getInnermostCapturedStmt(); |
4146 | const Stmt *CapturedStmt = S.getInnermostCapturedStmt()->getCapturedStmt(); |
4147 | const auto *CS = dyn_cast<CompoundStmt>(Val: CapturedStmt); |
4148 | llvm::SmallVector<BodyGenCallbackTy, 4> SectionCBVector; |
4149 | if (CS) { |
4150 | for (const Stmt *SubStmt : CS->children()) { |
4151 | auto SectionCB = [this, SubStmt](InsertPointTy AllocaIP, |
4152 | InsertPointTy CodeGenIP) { |
4153 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4154 | *this, SubStmt, AllocaIP, CodeGenIP, "section" ); |
4155 | }; |
4156 | SectionCBVector.push_back(SectionCB); |
4157 | } |
4158 | } else { |
4159 | auto SectionCB = [this, CapturedStmt](InsertPointTy AllocaIP, |
4160 | InsertPointTy CodeGenIP) { |
4161 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4162 | CGF&: *this, RegionBodyStmt: CapturedStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4163 | }; |
4164 | SectionCBVector.push_back(Elt: SectionCB); |
4165 | } |
4166 | |
4167 | // Privatization callback that performs appropriate action for |
4168 | // shared/private/firstprivate/lastprivate/copyin/... variables. |
4169 | // |
4170 | // TODO: This defaults to shared right now. |
4171 | auto PrivCB = [](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, |
4172 | llvm::Value &, llvm::Value &Val, llvm::Value *&ReplVal) { |
4173 | // The next line is appropriate only for variables (Val) with the |
4174 | // data-sharing attribute "shared". |
4175 | ReplVal = &Val; |
4176 | |
4177 | return CodeGenIP; |
4178 | }; |
4179 | |
4180 | CGCapturedStmtInfo CGSI(*ICS, CR_OpenMP); |
4181 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(*this, &CGSI); |
4182 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP( |
4183 | AllocaInsertPt->getParent(), AllocaInsertPt->getIterator()); |
4184 | Builder.restoreIP(IP: OMPBuilder.createSections( |
4185 | Loc: Builder, AllocaIP, SectionCBs: SectionCBVector, PrivCB, FiniCB, IsCancellable: S.hasCancel(), |
4186 | IsNowait: S.getSingleClause<OMPNowaitClause>())); |
4187 | return; |
4188 | } |
4189 | { |
4190 | auto LPCRegion = |
4191 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4192 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4193 | EmitSections(S); |
4194 | } |
4195 | // Emit an implicit barrier at the end. |
4196 | if (!S.getSingleClause<OMPNowaitClause>()) { |
4197 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), |
4198 | OMPD_sections); |
4199 | } |
4200 | // Check for outer lastprivate conditional update. |
4201 | checkForLastprivateConditionalUpdate(*this, S); |
4202 | } |
4203 | |
4204 | void CodeGenFunction::EmitOMPSectionDirective(const OMPSectionDirective &S) { |
4205 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4206 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4207 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4208 | |
4209 | const Stmt *SectionRegionBodyStmt = S.getAssociatedStmt(); |
4210 | auto FiniCB = [this](InsertPointTy IP) { |
4211 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4212 | }; |
4213 | |
4214 | auto BodyGenCB = [SectionRegionBodyStmt, this](InsertPointTy AllocaIP, |
4215 | InsertPointTy CodeGenIP) { |
4216 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4217 | CGF&: *this, RegionBodyStmt: SectionRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "section" ); |
4218 | }; |
4219 | |
4220 | LexicalScope Scope(*this, S.getSourceRange()); |
4221 | EmitStopPoint(&S); |
4222 | Builder.restoreIP(IP: OMPBuilder.createSection(Loc: Builder, BodyGenCB, FiniCB)); |
4223 | |
4224 | return; |
4225 | } |
4226 | LexicalScope Scope(*this, S.getSourceRange()); |
4227 | EmitStopPoint(&S); |
4228 | EmitStmt(S: S.getAssociatedStmt()); |
4229 | } |
4230 | |
4231 | void CodeGenFunction::EmitOMPSingleDirective(const OMPSingleDirective &S) { |
4232 | llvm::SmallVector<const Expr *, 8> CopyprivateVars; |
4233 | llvm::SmallVector<const Expr *, 8> DestExprs; |
4234 | llvm::SmallVector<const Expr *, 8> SrcExprs; |
4235 | llvm::SmallVector<const Expr *, 8> AssignmentOps; |
4236 | // Check if there are any 'copyprivate' clauses associated with this |
4237 | // 'single' construct. |
4238 | // Build a list of copyprivate variables along with helper expressions |
4239 | // (<source>, <destination>, <destination>=<source> expressions) |
4240 | for (const auto *C : S.getClausesOfKind<OMPCopyprivateClause>()) { |
4241 | CopyprivateVars.append(C->varlists().begin(), C->varlists().end()); |
4242 | DestExprs.append(C->destination_exprs().begin(), |
4243 | C->destination_exprs().end()); |
4244 | SrcExprs.append(C->source_exprs().begin(), C->source_exprs().end()); |
4245 | AssignmentOps.append(C->assignment_ops().begin(), |
4246 | C->assignment_ops().end()); |
4247 | } |
4248 | // Emit code for 'single' region along with 'copyprivate' clauses |
4249 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4250 | Action.Enter(CGF); |
4251 | OMPPrivateScope SingleScope(CGF); |
4252 | (void)CGF.EmitOMPFirstprivateClause(S, SingleScope); |
4253 | CGF.EmitOMPPrivateClause(S, SingleScope); |
4254 | (void)SingleScope.Privatize(); |
4255 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
4256 | }; |
4257 | { |
4258 | auto LPCRegion = |
4259 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4260 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
4261 | CGM.getOpenMPRuntime().emitSingleRegion(CGF&: *this, SingleOpGen: CodeGen, Loc: S.getBeginLoc(), |
4262 | CopyprivateVars, DestExprs, |
4263 | SrcExprs, AssignmentOps); |
4264 | } |
4265 | // Emit an implicit barrier at the end (to avoid data race on firstprivate |
4266 | // init or if no 'nowait' clause was specified and no 'copyprivate' clause). |
4267 | if (!S.getSingleClause<OMPNowaitClause>() && CopyprivateVars.empty()) { |
4268 | CGM.getOpenMPRuntime().emitBarrierCall( |
4269 | *this, S.getBeginLoc(), |
4270 | S.getSingleClause<OMPNowaitClause>() ? OMPD_unknown : OMPD_single); |
4271 | } |
4272 | // Check for outer lastprivate conditional update. |
4273 | checkForLastprivateConditionalUpdate(*this, S); |
4274 | } |
4275 | |
4276 | static void emitMaster(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4277 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4278 | Action.Enter(CGF); |
4279 | CGF.EmitStmt(S: S.getRawStmt()); |
4280 | }; |
4281 | CGF.CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
4282 | } |
4283 | |
4284 | void CodeGenFunction::EmitOMPMasterDirective(const OMPMasterDirective &S) { |
4285 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4286 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4287 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4288 | |
4289 | const Stmt *MasterRegionBodyStmt = S.getAssociatedStmt(); |
4290 | |
4291 | auto FiniCB = [this](InsertPointTy IP) { |
4292 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4293 | }; |
4294 | |
4295 | auto BodyGenCB = [MasterRegionBodyStmt, this](InsertPointTy AllocaIP, |
4296 | InsertPointTy CodeGenIP) { |
4297 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4298 | CGF&: *this, RegionBodyStmt: MasterRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "master" ); |
4299 | }; |
4300 | |
4301 | LexicalScope Scope(*this, S.getSourceRange()); |
4302 | EmitStopPoint(&S); |
4303 | Builder.restoreIP(IP: OMPBuilder.createMaster(Loc: Builder, BodyGenCB, FiniCB)); |
4304 | |
4305 | return; |
4306 | } |
4307 | LexicalScope Scope(*this, S.getSourceRange()); |
4308 | EmitStopPoint(&S); |
4309 | emitMaster(*this, S); |
4310 | } |
4311 | |
4312 | static void emitMasked(CodeGenFunction &CGF, const OMPExecutableDirective &S) { |
4313 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4314 | Action.Enter(CGF); |
4315 | CGF.EmitStmt(S: S.getRawStmt()); |
4316 | }; |
4317 | Expr *Filter = nullptr; |
4318 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4319 | Filter = FilterClause->getThreadID(); |
4320 | CGF.CGM.getOpenMPRuntime().emitMaskedRegion(CGF, MaskedOpGen: CodeGen, Loc: S.getBeginLoc(), |
4321 | Filter); |
4322 | } |
4323 | |
4324 | void CodeGenFunction::EmitOMPMaskedDirective(const OMPMaskedDirective &S) { |
4325 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4326 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4327 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4328 | |
4329 | const Stmt *MaskedRegionBodyStmt = S.getAssociatedStmt(); |
4330 | const Expr *Filter = nullptr; |
4331 | if (const auto *FilterClause = S.getSingleClause<OMPFilterClause>()) |
4332 | Filter = FilterClause->getThreadID(); |
4333 | llvm::Value *FilterVal = Filter |
4334 | ? EmitScalarExpr(E: Filter, IgnoreResultAssign: CGM.Int32Ty) |
4335 | : llvm::ConstantInt::get(Ty: CGM.Int32Ty, /*V=*/0); |
4336 | |
4337 | auto FiniCB = [this](InsertPointTy IP) { |
4338 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4339 | }; |
4340 | |
4341 | auto BodyGenCB = [MaskedRegionBodyStmt, this](InsertPointTy AllocaIP, |
4342 | InsertPointTy CodeGenIP) { |
4343 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4344 | CGF&: *this, RegionBodyStmt: MaskedRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "masked" ); |
4345 | }; |
4346 | |
4347 | LexicalScope Scope(*this, S.getSourceRange()); |
4348 | EmitStopPoint(&S); |
4349 | Builder.restoreIP( |
4350 | IP: OMPBuilder.createMasked(Loc: Builder, BodyGenCB, FiniCB, Filter: FilterVal)); |
4351 | |
4352 | return; |
4353 | } |
4354 | LexicalScope Scope(*this, S.getSourceRange()); |
4355 | EmitStopPoint(&S); |
4356 | emitMasked(*this, S); |
4357 | } |
4358 | |
4359 | void CodeGenFunction::EmitOMPCriticalDirective(const OMPCriticalDirective &S) { |
4360 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
4361 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
4362 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
4363 | |
4364 | const Stmt *CriticalRegionBodyStmt = S.getAssociatedStmt(); |
4365 | const Expr *Hint = nullptr; |
4366 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4367 | Hint = HintClause->getHint(); |
4368 | |
4369 | // TODO: This is slightly different from what's currently being done in |
4370 | // clang. Fix the Int32Ty to IntPtrTy (pointer width size) when everything |
4371 | // about typing is final. |
4372 | llvm::Value *HintInst = nullptr; |
4373 | if (Hint) |
4374 | HintInst = |
4375 | Builder.CreateIntCast(V: EmitScalarExpr(E: Hint), DestTy: CGM.Int32Ty, isSigned: false); |
4376 | |
4377 | auto FiniCB = [this](InsertPointTy IP) { |
4378 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
4379 | }; |
4380 | |
4381 | auto BodyGenCB = [CriticalRegionBodyStmt, this](InsertPointTy AllocaIP, |
4382 | InsertPointTy CodeGenIP) { |
4383 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
4384 | CGF&: *this, RegionBodyStmt: CriticalRegionBodyStmt, AllocaIP, CodeGenIP, RegionName: "critical" ); |
4385 | }; |
4386 | |
4387 | LexicalScope Scope(*this, S.getSourceRange()); |
4388 | EmitStopPoint(&S); |
4389 | Builder.restoreIP(IP: OMPBuilder.createCritical( |
4390 | Loc: Builder, BodyGenCB, FiniCB, CriticalName: S.getDirectiveName().getAsString(), |
4391 | HintInst)); |
4392 | |
4393 | return; |
4394 | } |
4395 | |
4396 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4397 | Action.Enter(CGF); |
4398 | CGF.EmitStmt(S: S.getAssociatedStmt()); |
4399 | }; |
4400 | const Expr *Hint = nullptr; |
4401 | if (const auto *HintClause = S.getSingleClause<OMPHintClause>()) |
4402 | Hint = HintClause->getHint(); |
4403 | LexicalScope Scope(*this, S.getSourceRange()); |
4404 | EmitStopPoint(&S); |
4405 | CGM.getOpenMPRuntime().emitCriticalRegion(CGF&: *this, |
4406 | CriticalName: S.getDirectiveName().getAsString(), |
4407 | CriticalOpGen: CodeGen, Loc: S.getBeginLoc(), Hint); |
4408 | } |
4409 | |
4410 | void CodeGenFunction::EmitOMPParallelForDirective( |
4411 | const OMPParallelForDirective &S) { |
4412 | // Emit directive as a combined directive that consists of two implicit |
4413 | // directives: 'parallel' with 'for' directive. |
4414 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4415 | Action.Enter(CGF); |
4416 | emitOMPCopyinClause(CGF, S); |
4417 | (void)emitWorksharingDirective(CGF, S, S.hasCancel()); |
4418 | }; |
4419 | { |
4420 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4421 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4422 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4423 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4424 | OMPLoopScope LoopScope(CGF, S); |
4425 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4426 | }; |
4427 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4428 | [](const OMPReductionClause *C) { |
4429 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4430 | }); |
4431 | if (IsInscan) |
4432 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4433 | auto LPCRegion = |
4434 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4435 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
4436 | emitEmptyBoundParameters); |
4437 | if (IsInscan) |
4438 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4439 | } |
4440 | // Check for outer lastprivate conditional update. |
4441 | checkForLastprivateConditionalUpdate(*this, S); |
4442 | } |
4443 | |
4444 | void CodeGenFunction::EmitOMPParallelForSimdDirective( |
4445 | const OMPParallelForSimdDirective &S) { |
4446 | // Emit directive as a combined directive that consists of two implicit |
4447 | // directives: 'parallel' with 'for' directive. |
4448 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4449 | Action.Enter(CGF); |
4450 | emitOMPCopyinClause(CGF, S); |
4451 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
4452 | }; |
4453 | { |
4454 | const auto &&NumIteratorsGen = [&S](CodeGenFunction &CGF) { |
4455 | CodeGenFunction::OMPLocalDeclMapRAII Scope(CGF); |
4456 | CGCapturedStmtInfo CGSI(CR_OpenMP); |
4457 | CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGSI); |
4458 | OMPLoopScope LoopScope(CGF, S); |
4459 | return CGF.EmitScalarExpr(S.getNumIterations()); |
4460 | }; |
4461 | bool IsInscan = llvm::any_of(S.getClausesOfKind<OMPReductionClause>(), |
4462 | [](const OMPReductionClause *C) { |
4463 | return C->getModifier() == OMPC_REDUCTION_inscan; |
4464 | }); |
4465 | if (IsInscan) |
4466 | emitScanBasedDirectiveDecls(*this, S, NumIteratorsGen); |
4467 | auto LPCRegion = |
4468 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4469 | emitCommonOMPParallelDirective(*this, S, OMPD_for_simd, CodeGen, |
4470 | emitEmptyBoundParameters); |
4471 | if (IsInscan) |
4472 | emitScanBasedDirectiveFinals(*this, S, NumIteratorsGen); |
4473 | } |
4474 | // Check for outer lastprivate conditional update. |
4475 | checkForLastprivateConditionalUpdate(*this, S); |
4476 | } |
4477 | |
4478 | void CodeGenFunction::EmitOMPParallelMasterDirective( |
4479 | const OMPParallelMasterDirective &S) { |
4480 | // Emit directive as a combined directive that consists of two implicit |
4481 | // directives: 'parallel' with 'master' directive. |
4482 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4483 | Action.Enter(CGF); |
4484 | OMPPrivateScope PrivateScope(CGF); |
4485 | emitOMPCopyinClause(CGF, S); |
4486 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4487 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4488 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4489 | (void)PrivateScope.Privatize(); |
4490 | emitMaster(CGF, S); |
4491 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4492 | }; |
4493 | { |
4494 | auto LPCRegion = |
4495 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4496 | emitCommonOMPParallelDirective(*this, S, OMPD_master, CodeGen, |
4497 | emitEmptyBoundParameters); |
4498 | emitPostUpdateForReductionClause(*this, S, |
4499 | [](CodeGenFunction &) { return nullptr; }); |
4500 | } |
4501 | // Check for outer lastprivate conditional update. |
4502 | checkForLastprivateConditionalUpdate(*this, S); |
4503 | } |
4504 | |
4505 | void CodeGenFunction::EmitOMPParallelMaskedDirective( |
4506 | const OMPParallelMaskedDirective &S) { |
4507 | // Emit directive as a combined directive that consists of two implicit |
4508 | // directives: 'parallel' with 'masked' directive. |
4509 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4510 | Action.Enter(CGF); |
4511 | OMPPrivateScope PrivateScope(CGF); |
4512 | emitOMPCopyinClause(CGF, S); |
4513 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
4514 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
4515 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
4516 | (void)PrivateScope.Privatize(); |
4517 | emitMasked(CGF, S); |
4518 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
4519 | }; |
4520 | { |
4521 | auto LPCRegion = |
4522 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4523 | emitCommonOMPParallelDirective(*this, S, OMPD_masked, CodeGen, |
4524 | emitEmptyBoundParameters); |
4525 | emitPostUpdateForReductionClause(*this, S, |
4526 | [](CodeGenFunction &) { return nullptr; }); |
4527 | } |
4528 | // Check for outer lastprivate conditional update. |
4529 | checkForLastprivateConditionalUpdate(*this, S); |
4530 | } |
4531 | |
4532 | void CodeGenFunction::EmitOMPParallelSectionsDirective( |
4533 | const OMPParallelSectionsDirective &S) { |
4534 | // Emit directive as a combined directive that consists of two implicit |
4535 | // directives: 'parallel' with 'sections' directive. |
4536 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
4537 | Action.Enter(CGF); |
4538 | emitOMPCopyinClause(CGF, S); |
4539 | CGF.EmitSections(S); |
4540 | }; |
4541 | { |
4542 | auto LPCRegion = |
4543 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
4544 | emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen, |
4545 | emitEmptyBoundParameters); |
4546 | } |
4547 | // Check for outer lastprivate conditional update. |
4548 | checkForLastprivateConditionalUpdate(*this, S); |
4549 | } |
4550 | |
4551 | namespace { |
4552 | /// Get the list of variables declared in the context of the untied tasks. |
4553 | class CheckVarsEscapingUntiedTaskDeclContext final |
4554 | : public ConstStmtVisitor<CheckVarsEscapingUntiedTaskDeclContext> { |
4555 | llvm::SmallVector<const VarDecl *, 4> PrivateDecls; |
4556 | |
4557 | public: |
4558 | explicit CheckVarsEscapingUntiedTaskDeclContext() = default; |
4559 | virtual ~CheckVarsEscapingUntiedTaskDeclContext() = default; |
4560 | void VisitDeclStmt(const DeclStmt *S) { |
4561 | if (!S) |
4562 | return; |
4563 | // Need to privatize only local vars, static locals can be processed as is. |
4564 | for (const Decl *D : S->decls()) { |
4565 | if (const auto *VD = dyn_cast_or_null<VarDecl>(Val: D)) |
4566 | if (VD->hasLocalStorage()) |
4567 | PrivateDecls.push_back(Elt: VD); |
4568 | } |
4569 | } |
4570 | void VisitOMPExecutableDirective(const OMPExecutableDirective *) {} |
4571 | void VisitCapturedStmt(const CapturedStmt *) {} |
4572 | void VisitLambdaExpr(const LambdaExpr *) {} |
4573 | void VisitBlockExpr(const BlockExpr *) {} |
4574 | void VisitStmt(const Stmt *S) { |
4575 | if (!S) |
4576 | return; |
4577 | for (const Stmt *Child : S->children()) |
4578 | if (Child) |
4579 | Visit(Child); |
4580 | } |
4581 | |
4582 | /// Swaps list of vars with the provided one. |
4583 | ArrayRef<const VarDecl *> getPrivateDecls() const { return PrivateDecls; } |
4584 | }; |
4585 | } // anonymous namespace |
4586 | |
4587 | static void buildDependences(const OMPExecutableDirective &S, |
4588 | OMPTaskDataTy &Data) { |
4589 | |
4590 | // First look for 'omp_all_memory' and add this first. |
4591 | bool OmpAllMemory = false; |
4592 | if (llvm::any_of( |
4593 | Range: S.getClausesOfKind<OMPDependClause>(), P: [](const OMPDependClause *C) { |
4594 | return C->getDependencyKind() == OMPC_DEPEND_outallmemory || |
4595 | C->getDependencyKind() == OMPC_DEPEND_inoutallmemory; |
4596 | })) { |
4597 | OmpAllMemory = true; |
4598 | // Since both OMPC_DEPEND_outallmemory and OMPC_DEPEND_inoutallmemory are |
4599 | // equivalent to the runtime, always use OMPC_DEPEND_outallmemory to |
4600 | // simplify. |
4601 | OMPTaskDataTy::DependData &DD = |
4602 | Data.Dependences.emplace_back(Args: OMPC_DEPEND_outallmemory, |
4603 | /*IteratorExpr=*/Args: nullptr); |
4604 | // Add a nullptr Expr to simplify the codegen in emitDependData. |
4605 | DD.DepExprs.push_back(Elt: nullptr); |
4606 | } |
4607 | // Add remaining dependences skipping any 'out' or 'inout' if they are |
4608 | // overridden by 'omp_all_memory'. |
4609 | for (const auto *C : S.getClausesOfKind<OMPDependClause>()) { |
4610 | OpenMPDependClauseKind Kind = C->getDependencyKind(); |
4611 | if (Kind == OMPC_DEPEND_outallmemory || Kind == OMPC_DEPEND_inoutallmemory) |
4612 | continue; |
4613 | if (OmpAllMemory && (Kind == OMPC_DEPEND_out || Kind == OMPC_DEPEND_inout)) |
4614 | continue; |
4615 | OMPTaskDataTy::DependData &DD = |
4616 | Data.Dependences.emplace_back(Args: C->getDependencyKind(), Args: C->getModifier()); |
4617 | DD.DepExprs.append(C->varlist_begin(), C->varlist_end()); |
4618 | } |
4619 | } |
4620 | |
4621 | void CodeGenFunction::EmitOMPTaskBasedDirective( |
4622 | const OMPExecutableDirective &S, const OpenMPDirectiveKind CapturedRegion, |
4623 | const RegionCodeGenTy &BodyGen, const TaskGenTy &TaskGen, |
4624 | OMPTaskDataTy &Data) { |
4625 | // Emit outlined function for task construct. |
4626 | const CapturedStmt *CS = S.getCapturedStmt(CapturedRegion); |
4627 | auto I = CS->getCapturedDecl()->param_begin(); |
4628 | auto PartId = std::next(I); |
4629 | auto TaskT = std::next(I, 4); |
4630 | // Check if the task is final |
4631 | if (const auto *Clause = S.getSingleClause<OMPFinalClause>()) { |
4632 | // If the condition constant folds and can be elided, try to avoid emitting |
4633 | // the condition and the dead arm of the if/else. |
4634 | const Expr *Cond = Clause->getCondition(); |
4635 | bool CondConstant; |
4636 | if (ConstantFoldsToSimpleInteger(Cond, Result&: CondConstant)) |
4637 | Data.Final.setInt(CondConstant); |
4638 | else |
4639 | Data.Final.setPointer(EvaluateExprAsBool(E: Cond)); |
4640 | } else { |
4641 | // By default the task is not final. |
4642 | Data.Final.setInt(/*IntVal=*/false); |
4643 | } |
4644 | // Check if the task has 'priority' clause. |
4645 | if (const auto *Clause = S.getSingleClause<OMPPriorityClause>()) { |
4646 | const Expr *Prio = Clause->getPriority(); |
4647 | Data.Priority.setInt(/*IntVal=*/true); |
4648 | Data.Priority.setPointer(EmitScalarConversion( |
4649 | Src: EmitScalarExpr(E: Prio), SrcTy: Prio->getType(), |
4650 | DstTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1), |
4651 | Loc: Prio->getExprLoc())); |
4652 | } |
4653 | // The first function argument for tasks is a thread id, the second one is a |
4654 | // part id (0 for tied tasks, >=0 for untied task). |
4655 | llvm::DenseSet<const VarDecl *> EmittedAsPrivate; |
4656 | // Get list of private variables. |
4657 | for (const auto *C : S.getClausesOfKind<OMPPrivateClause>()) { |
4658 | auto IRef = C->varlist_begin(); |
4659 | for (const Expr *IInit : C->private_copies()) { |
4660 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4661 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4662 | Data.PrivateVars.push_back(Elt: *IRef); |
4663 | Data.PrivateCopies.push_back(Elt: IInit); |
4664 | } |
4665 | ++IRef; |
4666 | } |
4667 | } |
4668 | EmittedAsPrivate.clear(); |
4669 | // Get list of firstprivate variables. |
4670 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
4671 | auto IRef = C->varlist_begin(); |
4672 | auto IElemInitRef = C->inits().begin(); |
4673 | for (const Expr *IInit : C->private_copies()) { |
4674 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4675 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4676 | Data.FirstprivateVars.push_back(Elt: *IRef); |
4677 | Data.FirstprivateCopies.push_back(Elt: IInit); |
4678 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
4679 | } |
4680 | ++IRef; |
4681 | ++IElemInitRef; |
4682 | } |
4683 | } |
4684 | // Get list of lastprivate variables (for taskloops). |
4685 | llvm::MapVector<const VarDecl *, const DeclRefExpr *> LastprivateDstsOrigs; |
4686 | for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) { |
4687 | auto IRef = C->varlist_begin(); |
4688 | auto ID = C->destination_exprs().begin(); |
4689 | for (const Expr *IInit : C->private_copies()) { |
4690 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(*IRef)->getDecl()); |
4691 | if (EmittedAsPrivate.insert(OrigVD->getCanonicalDecl()).second) { |
4692 | Data.LastprivateVars.push_back(Elt: *IRef); |
4693 | Data.LastprivateCopies.push_back(Elt: IInit); |
4694 | } |
4695 | LastprivateDstsOrigs.insert( |
4696 | std::make_pair(cast<VarDecl>(Val: cast<DeclRefExpr>(Val: *ID)->getDecl()), |
4697 | cast<DeclRefExpr>(*IRef))); |
4698 | ++IRef; |
4699 | ++ID; |
4700 | } |
4701 | } |
4702 | SmallVector<const Expr *, 4> LHSs; |
4703 | SmallVector<const Expr *, 4> RHSs; |
4704 | for (const auto *C : S.getClausesOfKind<OMPReductionClause>()) { |
4705 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
4706 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
4707 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
4708 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
4709 | in_end: C->reduction_ops().end()); |
4710 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
4711 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
4712 | } |
4713 | Data.Reductions = CGM.getOpenMPRuntime().emitTaskReductionInit( |
4714 | CGF&: *this, Loc: S.getBeginLoc(), LHSExprs: LHSs, RHSExprs: RHSs, Data); |
4715 | // Build list of dependences. |
4716 | buildDependences(S, Data); |
4717 | // Get list of local vars for untied tasks. |
4718 | if (!Data.Tied) { |
4719 | CheckVarsEscapingUntiedTaskDeclContext Checker; |
4720 | Checker.Visit(S.getInnermostCapturedStmt()->getCapturedStmt()); |
4721 | Data.PrivateLocals.append(in_start: Checker.getPrivateDecls().begin(), |
4722 | in_end: Checker.getPrivateDecls().end()); |
4723 | } |
4724 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, &LastprivateDstsOrigs, |
4725 | CapturedRegion](CodeGenFunction &CGF, |
4726 | PrePostActionTy &Action) { |
4727 | llvm::MapVector<CanonicalDeclPtr<const VarDecl>, |
4728 | std::pair<Address, Address>> |
4729 | UntiedLocalVars; |
4730 | // Set proper addresses for generated private copies. |
4731 | OMPPrivateScope Scope(CGF); |
4732 | // Generate debug info for variables present in shared clause. |
4733 | if (auto *DI = CGF.getDebugInfo()) { |
4734 | llvm::SmallDenseMap<const VarDecl *, FieldDecl *> CaptureFields = |
4735 | CGF.CapturedStmtInfo->getCaptureFields(); |
4736 | llvm::Value *ContextValue = CGF.CapturedStmtInfo->getContextValue(); |
4737 | if (CaptureFields.size() && ContextValue) { |
4738 | unsigned CharWidth = CGF.getContext().getCharWidth(); |
4739 | // The shared variables are packed together as members of structure. |
4740 | // So the address of each shared variable can be computed by adding |
4741 | // offset of it (within record) to the base address of record. For each |
4742 | // shared variable, debug intrinsic llvm.dbg.declare is generated with |
4743 | // appropriate expressions (DIExpression). |
4744 | // Ex: |
4745 | // %12 = load %struct.anon*, %struct.anon** %__context.addr.i |
4746 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4747 | // metadata !svar1, |
4748 | // metadata !DIExpression(DW_OP_deref)) |
4749 | // call void @llvm.dbg.declare(metadata %struct.anon* %12, |
4750 | // metadata !svar2, |
4751 | // metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) |
4752 | for (auto It = CaptureFields.begin(); It != CaptureFields.end(); ++It) { |
4753 | const VarDecl *SharedVar = It->first; |
4754 | RecordDecl *CaptureRecord = It->second->getParent(); |
4755 | const ASTRecordLayout &Layout = |
4756 | CGF.getContext().getASTRecordLayout(D: CaptureRecord); |
4757 | unsigned Offset = |
4758 | Layout.getFieldOffset(FieldNo: It->second->getFieldIndex()) / CharWidth; |
4759 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4760 | (void)DI->EmitDeclareOfAutoVariable(Decl: SharedVar, AI: ContextValue, |
4761 | Builder&: CGF.Builder, UsePointerValue: false); |
4762 | // Get the call dbg.declare instruction we just created and update |
4763 | // its DIExpression to add offset to base address. |
4764 | auto UpdateExpr = [](llvm::LLVMContext &Ctx, auto *Declare, |
4765 | unsigned Offset) { |
4766 | SmallVector<uint64_t, 8> Ops; |
4767 | // Add offset to the base address if non zero. |
4768 | if (Offset) { |
4769 | Ops.push_back(Elt: llvm::dwarf::DW_OP_plus_uconst); |
4770 | Ops.push_back(Elt: Offset); |
4771 | } |
4772 | Ops.push_back(Elt: llvm::dwarf::DW_OP_deref); |
4773 | Declare->setExpression(llvm::DIExpression::get(Context&: Ctx, Elements: Ops)); |
4774 | }; |
4775 | llvm::Instruction &Last = CGF.Builder.GetInsertBlock()->back(); |
4776 | if (auto DDI = dyn_cast<llvm::DbgVariableIntrinsic>(Val: &Last)) |
4777 | UpdateExpr(DDI->getContext(), DDI, Offset); |
4778 | // If we're emitting using the new debug info format into a block |
4779 | // without a terminator, the record will be "trailing". |
4780 | assert(!Last.isTerminator() && "unexpected terminator" ); |
4781 | if (auto *Marker = |
4782 | CGF.Builder.GetInsertBlock()->getTrailingDbgRecords()) { |
4783 | for (llvm::DbgVariableRecord &DVR : llvm::reverse( |
4784 | C: llvm::filterDbgVars(R: Marker->getDbgRecordRange()))) { |
4785 | UpdateExpr(Last.getContext(), &DVR, Offset); |
4786 | break; |
4787 | } |
4788 | } |
4789 | } |
4790 | } |
4791 | } |
4792 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> FirstprivatePtrs; |
4793 | if (!Data.PrivateVars.empty() || !Data.FirstprivateVars.empty() || |
4794 | !Data.LastprivateVars.empty() || !Data.PrivateLocals.empty()) { |
4795 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
4796 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
4797 | Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
4798 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
4799 | CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
4800 | // Map privates. |
4801 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
4802 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
4803 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
4804 | CallArgs.push_back(Elt: PrivatesPtr); |
4805 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
4806 | for (const Expr *E : Data.PrivateVars) { |
4807 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4808 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
4809 | T: CGF.getContext().getPointerType(T: E->getType()), Name: ".priv.ptr.addr" ); |
4810 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4811 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4812 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4813 | } |
4814 | for (const Expr *E : Data.FirstprivateVars) { |
4815 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4816 | RawAddress PrivatePtr = |
4817 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
4818 | Name: ".firstpriv.ptr.addr" ); |
4819 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4820 | FirstprivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4821 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4822 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4823 | } |
4824 | for (const Expr *E : Data.LastprivateVars) { |
4825 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
4826 | RawAddress PrivatePtr = |
4827 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
4828 | Name: ".lastpriv.ptr.addr" ); |
4829 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
4830 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4831 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4832 | } |
4833 | for (const VarDecl *VD : Data.PrivateLocals) { |
4834 | QualType Ty = VD->getType().getNonReferenceType(); |
4835 | if (VD->getType()->isLValueReferenceType()) |
4836 | Ty = CGF.getContext().getPointerType(T: Ty); |
4837 | if (isAllocatableDecl(VD)) |
4838 | Ty = CGF.getContext().getPointerType(T: Ty); |
4839 | RawAddress PrivatePtr = CGF.CreateMemTemp( |
4840 | T: CGF.getContext().getPointerType(T: Ty), Name: ".local.ptr.addr" ); |
4841 | auto Result = UntiedLocalVars.insert( |
4842 | KV: std::make_pair(x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid()))); |
4843 | // If key exists update in place. |
4844 | if (Result.second == false) |
4845 | *Result.first = std::make_pair( |
4846 | x&: VD, y: std::make_pair(x&: PrivatePtr, y: Address::invalid())); |
4847 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
4848 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
4849 | } |
4850 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
4851 | Params: ParamTypes, /*isVarArg=*/false); |
4852 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
4853 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
4854 | for (const auto &Pair : LastprivateDstsOrigs) { |
4855 | const auto *OrigVD = cast<VarDecl>(Val: Pair.second->getDecl()); |
4856 | DeclRefExpr DRE(CGF.getContext(), const_cast<VarDecl *>(OrigVD), |
4857 | /*RefersToEnclosingVariableOrCapture=*/ |
4858 | CGF.CapturedStmtInfo->lookup(VD: OrigVD) != nullptr, |
4859 | Pair.second->getType(), VK_LValue, |
4860 | Pair.second->getExprLoc()); |
4861 | Scope.addPrivate(LocalVD: Pair.first, Addr: CGF.EmitLValue(&DRE).getAddress(CGF)); |
4862 | } |
4863 | for (const auto &Pair : PrivatePtrs) { |
4864 | Address Replacement = Address( |
4865 | CGF.Builder.CreateLoad(Addr: Pair.second), |
4866 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
4867 | CGF.getContext().getDeclAlign(Pair.first)); |
4868 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
4869 | if (auto *DI = CGF.getDebugInfo()) |
4870 | if (CGF.CGM.getCodeGenOpts().hasReducedDebugInfo()) |
4871 | (void)DI->EmitDeclareOfAutoVariable( |
4872 | Decl: Pair.first, AI: Pair.second.getBasePointer(), Builder&: CGF.Builder, |
4873 | /*UsePointerValue*/ true); |
4874 | } |
4875 | // Adjust mapping for internal locals by mapping actual memory instead of |
4876 | // a pointer to this memory. |
4877 | for (auto &Pair : UntiedLocalVars) { |
4878 | QualType VDType = Pair.first->getType().getNonReferenceType(); |
4879 | if (Pair.first->getType()->isLValueReferenceType()) |
4880 | VDType = CGF.getContext().getPointerType(T: VDType); |
4881 | if (isAllocatableDecl(VD: Pair.first)) { |
4882 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
4883 | Address Replacement( |
4884 | Ptr, |
4885 | CGF.ConvertTypeForMem(T: CGF.getContext().getPointerType(T: VDType)), |
4886 | CGF.getPointerAlign()); |
4887 | Pair.second.first = Replacement; |
4888 | Ptr = CGF.Builder.CreateLoad(Addr: Replacement); |
4889 | Replacement = Address(Ptr, CGF.ConvertTypeForMem(T: VDType), |
4890 | CGF.getContext().getDeclAlign(Pair.first)); |
4891 | Pair.second.second = Replacement; |
4892 | } else { |
4893 | llvm::Value *Ptr = CGF.Builder.CreateLoad(Addr: Pair.second.first); |
4894 | Address Replacement(Ptr, CGF.ConvertTypeForMem(T: VDType), |
4895 | CGF.getContext().getDeclAlign(Pair.first)); |
4896 | Pair.second.first = Replacement; |
4897 | } |
4898 | } |
4899 | } |
4900 | if (Data.Reductions) { |
4901 | OMPPrivateScope FirstprivateScope(CGF); |
4902 | for (const auto &Pair : FirstprivatePtrs) { |
4903 | Address Replacement( |
4904 | CGF.Builder.CreateLoad(Addr: Pair.second), |
4905 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
4906 | CGF.getContext().getDeclAlign(Pair.first)); |
4907 | FirstprivateScope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
4908 | } |
4909 | (void)FirstprivateScope.Privatize(); |
4910 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
4911 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
4912 | Data.ReductionCopies, Data.ReductionOps); |
4913 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
4914 | Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: 9))); |
4915 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
4916 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
4917 | RedCG.emitAggregateType(CGF, N: Cnt); |
4918 | // FIXME: This must removed once the runtime library is fixed. |
4919 | // Emit required threadprivate variables for |
4920 | // initializer/combiner/finalizer. |
4921 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
4922 | RCG&: RedCG, N: Cnt); |
4923 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4924 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
4925 | Replacement = Address( |
4926 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
4927 | SrcTy: CGF.getContext().VoidPtrTy, |
4928 | DstTy: CGF.getContext().getPointerType( |
4929 | T: Data.ReductionCopies[Cnt]->getType()), |
4930 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
4931 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
4932 | Replacement.getAlignment()); |
4933 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
4934 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
4935 | } |
4936 | } |
4937 | // Privatize all private variables except for in_reduction items. |
4938 | (void)Scope.Privatize(); |
4939 | SmallVector<const Expr *, 4> InRedVars; |
4940 | SmallVector<const Expr *, 4> InRedPrivs; |
4941 | SmallVector<const Expr *, 4> InRedOps; |
4942 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
4943 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
4944 | auto IPriv = C->privates().begin(); |
4945 | auto IRed = C->reduction_ops().begin(); |
4946 | auto ITD = C->taskgroup_descriptors().begin(); |
4947 | for (const Expr *Ref : C->varlists()) { |
4948 | InRedVars.emplace_back(Ref); |
4949 | InRedPrivs.emplace_back(*IPriv); |
4950 | InRedOps.emplace_back(*IRed); |
4951 | TaskgroupDescriptors.emplace_back(*ITD); |
4952 | std::advance(IPriv, 1); |
4953 | std::advance(IRed, 1); |
4954 | std::advance(ITD, 1); |
4955 | } |
4956 | } |
4957 | // Privatize in_reduction items here, because taskgroup descriptors must be |
4958 | // privatized earlier. |
4959 | OMPPrivateScope InRedScope(CGF); |
4960 | if (!InRedVars.empty()) { |
4961 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
4962 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
4963 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
4964 | RedCG.emitAggregateType(CGF, N: Cnt); |
4965 | // The taskgroup descriptor variable is always implicit firstprivate and |
4966 | // privatized already during processing of the firstprivates. |
4967 | // FIXME: This must removed once the runtime library is fixed. |
4968 | // Emit required threadprivate variables for |
4969 | // initializer/combiner/finalizer. |
4970 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
4971 | RCG&: RedCG, N: Cnt); |
4972 | llvm::Value *ReductionsPtr; |
4973 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
4974 | ReductionsPtr = CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), |
4975 | Loc: TRExpr->getExprLoc()); |
4976 | } else { |
4977 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
4978 | } |
4979 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
4980 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
4981 | Replacement = Address( |
4982 | CGF.EmitScalarConversion( |
4983 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
4984 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
4985 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
4986 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
4987 | Replacement.getAlignment()); |
4988 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
4989 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
4990 | } |
4991 | } |
4992 | (void)InRedScope.Privatize(); |
4993 | |
4994 | CGOpenMPRuntime::UntiedTaskLocalDeclsRAII (CGF, |
4995 | UntiedLocalVars); |
4996 | Action.Enter(CGF); |
4997 | BodyGen(CGF); |
4998 | }; |
4999 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5000 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, Data.Tied, |
5001 | Data.NumberOfParts); |
5002 | OMPLexicalScope Scope(*this, S, std::nullopt, |
5003 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5004 | !isOpenMPSimdDirective(S.getDirectiveKind())); |
5005 | TaskGen(*this, OutlinedFn, Data); |
5006 | } |
5007 | |
5008 | static ImplicitParamDecl * |
5009 | createImplicitFirstprivateForType(ASTContext &C, OMPTaskDataTy &Data, |
5010 | QualType Ty, CapturedDecl *CD, |
5011 | SourceLocation Loc) { |
5012 | auto *OrigVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
5013 | ImplicitParamKind::Other); |
5014 | auto *OrigRef = DeclRefExpr::Create( |
5015 | C, NestedNameSpecifierLoc(), SourceLocation(), OrigVD, |
5016 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
5017 | auto *PrivateVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, Ty, |
5018 | ImplicitParamKind::Other); |
5019 | auto *PrivateRef = DeclRefExpr::Create( |
5020 | C, NestedNameSpecifierLoc(), SourceLocation(), PrivateVD, |
5021 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, Ty, VK_LValue); |
5022 | QualType ElemType = C.getBaseElementType(QT: Ty); |
5023 | auto *InitVD = ImplicitParamDecl::Create(C, CD, Loc, /*Id=*/nullptr, ElemType, |
5024 | ImplicitParamKind::Other); |
5025 | auto *InitRef = DeclRefExpr::Create( |
5026 | C, NestedNameSpecifierLoc(), SourceLocation(), InitVD, |
5027 | /*RefersToEnclosingVariableOrCapture=*/false, Loc, ElemType, VK_LValue); |
5028 | PrivateVD->setInitStyle(VarDecl::CInit); |
5029 | PrivateVD->setInit(ImplicitCastExpr::Create(Context: C, T: ElemType, Kind: CK_LValueToRValue, |
5030 | Operand: InitRef, /*BasePath=*/nullptr, |
5031 | Cat: VK_PRValue, FPO: FPOptionsOverride())); |
5032 | Data.FirstprivateVars.emplace_back(OrigRef); |
5033 | Data.FirstprivateCopies.emplace_back(PrivateRef); |
5034 | Data.FirstprivateInits.emplace_back(InitRef); |
5035 | return OrigVD; |
5036 | } |
5037 | |
5038 | void CodeGenFunction::EmitOMPTargetTaskBasedDirective( |
5039 | const OMPExecutableDirective &S, const RegionCodeGenTy &BodyGen, |
5040 | OMPTargetDataInfo &InputInfo) { |
5041 | // Emit outlined function for task construct. |
5042 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5043 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5044 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5045 | auto I = CS->getCapturedDecl()->param_begin(); |
5046 | auto PartId = std::next(x: I); |
5047 | auto TaskT = std::next(x: I, n: 4); |
5048 | OMPTaskDataTy Data; |
5049 | // The task is not final. |
5050 | Data.Final.setInt(/*IntVal=*/false); |
5051 | // Get list of firstprivate variables. |
5052 | for (const auto *C : S.getClausesOfKind<OMPFirstprivateClause>()) { |
5053 | auto IRef = C->varlist_begin(); |
5054 | auto IElemInitRef = C->inits().begin(); |
5055 | for (auto *IInit : C->private_copies()) { |
5056 | Data.FirstprivateVars.push_back(Elt: *IRef); |
5057 | Data.FirstprivateCopies.push_back(Elt: IInit); |
5058 | Data.FirstprivateInits.push_back(Elt: *IElemInitRef); |
5059 | ++IRef; |
5060 | ++IElemInitRef; |
5061 | } |
5062 | } |
5063 | SmallVector<const Expr *, 4> LHSs; |
5064 | SmallVector<const Expr *, 4> RHSs; |
5065 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5066 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5067 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5068 | Data.ReductionCopies.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5069 | Data.ReductionOps.append(in_start: C->reduction_ops().begin(), |
5070 | in_end: C->reduction_ops().end()); |
5071 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5072 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5073 | } |
5074 | OMPPrivateScope TargetScope(*this); |
5075 | VarDecl *BPVD = nullptr; |
5076 | VarDecl *PVD = nullptr; |
5077 | VarDecl *SVD = nullptr; |
5078 | VarDecl *MVD = nullptr; |
5079 | if (InputInfo.NumberOfTargetItems > 0) { |
5080 | auto *CD = CapturedDecl::Create( |
5081 | getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0); |
5082 | llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems); |
5083 | QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType( |
5084 | EltTy: getContext().VoidPtrTy, ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5085 | /*IndexTypeQuals=*/0); |
5086 | BPVD = createImplicitFirstprivateForType( |
5087 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5088 | PVD = createImplicitFirstprivateForType( |
5089 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5090 | QualType SizesType = getContext().getConstantArrayType( |
5091 | EltTy: getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1), |
5092 | ArySize: ArrSize, SizeExpr: nullptr, ASM: ArraySizeModifier::Normal, |
5093 | /*IndexTypeQuals=*/0); |
5094 | SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD, |
5095 | S.getBeginLoc()); |
5096 | TargetScope.addPrivate(LocalVD: BPVD, Addr: InputInfo.BasePointersArray); |
5097 | TargetScope.addPrivate(LocalVD: PVD, Addr: InputInfo.PointersArray); |
5098 | TargetScope.addPrivate(LocalVD: SVD, Addr: InputInfo.SizesArray); |
5099 | // If there is no user-defined mapper, the mapper array will be nullptr. In |
5100 | // this case, we don't need to privatize it. |
5101 | if (!isa_and_nonnull<llvm::ConstantPointerNull>( |
5102 | Val: InputInfo.MappersArray.emitRawPointer(CGF&: *this))) { |
5103 | MVD = createImplicitFirstprivateForType( |
5104 | getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc()); |
5105 | TargetScope.addPrivate(LocalVD: MVD, Addr: InputInfo.MappersArray); |
5106 | } |
5107 | } |
5108 | (void)TargetScope.Privatize(); |
5109 | buildDependences(S, Data); |
5110 | auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD, |
5111 | &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5112 | // Set proper addresses for generated private copies. |
5113 | OMPPrivateScope Scope(CGF); |
5114 | if (!Data.FirstprivateVars.empty()) { |
5115 | enum { PrivatesParam = 2, CopyFnParam = 3 }; |
5116 | llvm::Value *CopyFn = CGF.Builder.CreateLoad( |
5117 | Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: CopyFnParam))); |
5118 | llvm::Value *PrivatesPtr = CGF.Builder.CreateLoad(Addr: CGF.GetAddrOfLocalVar( |
5119 | CS->getCapturedDecl()->getParam(i: PrivatesParam))); |
5120 | // Map privates. |
5121 | llvm::SmallVector<std::pair<const VarDecl *, Address>, 16> PrivatePtrs; |
5122 | llvm::SmallVector<llvm::Value *, 16> CallArgs; |
5123 | llvm::SmallVector<llvm::Type *, 4> ParamTypes; |
5124 | CallArgs.push_back(Elt: PrivatesPtr); |
5125 | ParamTypes.push_back(Elt: PrivatesPtr->getType()); |
5126 | for (const Expr *E : Data.FirstprivateVars) { |
5127 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5128 | RawAddress PrivatePtr = |
5129 | CGF.CreateMemTemp(T: CGF.getContext().getPointerType(T: E->getType()), |
5130 | Name: ".firstpriv.ptr.addr" ); |
5131 | PrivatePtrs.emplace_back(Args&: VD, Args&: PrivatePtr); |
5132 | CallArgs.push_back(Elt: PrivatePtr.getPointer()); |
5133 | ParamTypes.push_back(Elt: PrivatePtr.getType()); |
5134 | } |
5135 | auto *CopyFnTy = llvm::FunctionType::get(Result: CGF.Builder.getVoidTy(), |
5136 | Params: ParamTypes, /*isVarArg=*/false); |
5137 | CGF.CGM.getOpenMPRuntime().emitOutlinedFunctionCall( |
5138 | CGF, Loc: S.getBeginLoc(), OutlinedFn: {CopyFnTy, CopyFn}, Args: CallArgs); |
5139 | for (const auto &Pair : PrivatePtrs) { |
5140 | Address Replacement( |
5141 | CGF.Builder.CreateLoad(Addr: Pair.second), |
5142 | CGF.ConvertTypeForMem(T: Pair.first->getType().getNonReferenceType()), |
5143 | CGF.getContext().getDeclAlign(Pair.first)); |
5144 | Scope.addPrivate(LocalVD: Pair.first, Addr: Replacement); |
5145 | } |
5146 | } |
5147 | CGF.processInReduction(S, Data, CGF, CS, Scope); |
5148 | if (InputInfo.NumberOfTargetItems > 0) { |
5149 | InputInfo.BasePointersArray = CGF.Builder.CreateConstArrayGEP( |
5150 | Addr: CGF.GetAddrOfLocalVar(VD: BPVD), /*Index=*/0); |
5151 | InputInfo.PointersArray = CGF.Builder.CreateConstArrayGEP( |
5152 | Addr: CGF.GetAddrOfLocalVar(VD: PVD), /*Index=*/0); |
5153 | InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP( |
5154 | Addr: CGF.GetAddrOfLocalVar(VD: SVD), /*Index=*/0); |
5155 | // If MVD is nullptr, the mapper array is not privatized |
5156 | if (MVD) |
5157 | InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP( |
5158 | Addr: CGF.GetAddrOfLocalVar(VD: MVD), /*Index=*/0); |
5159 | } |
5160 | |
5161 | Action.Enter(CGF); |
5162 | OMPLexicalScope LexScope(CGF, S, OMPD_task, /*EmitPreInitStmt=*/false); |
5163 | auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
5164 | if (CGF.CGM.getLangOpts().OpenMP >= 51 && |
5165 | needsTaskBasedThreadLimit(S.getDirectiveKind()) && TL) { |
5166 | // Emit __kmpc_set_thread_limit() to set the thread_limit for the task |
5167 | // enclosing this target region. This will indirectly set the thread_limit |
5168 | // for every applicable construct within target region. |
5169 | CGF.CGM.getOpenMPRuntime().emitThreadLimitClause( |
5170 | CGF, ThreadLimit: TL->getThreadLimit(), Loc: S.getBeginLoc()); |
5171 | } |
5172 | BodyGen(CGF); |
5173 | }; |
5174 | llvm::Function *OutlinedFn = CGM.getOpenMPRuntime().emitTaskOutlinedFunction( |
5175 | S, *I, *PartId, *TaskT, S.getDirectiveKind(), CodeGen, /*Tied=*/true, |
5176 | Data.NumberOfParts); |
5177 | llvm::APInt TrueOrFalse(32, S.hasClausesOfKind<OMPNowaitClause>() ? 1 : 0); |
5178 | IntegerLiteral IfCond(getContext(), TrueOrFalse, |
5179 | getContext().getIntTypeForBitwidth(DestWidth: 32, /*Signed=*/0), |
5180 | SourceLocation()); |
5181 | CGM.getOpenMPRuntime().emitTaskCall(*this, S.getBeginLoc(), S, OutlinedFn, |
5182 | SharedsTy, CapturedStruct, &IfCond, Data); |
5183 | } |
5184 | |
5185 | void CodeGenFunction::processInReduction(const OMPExecutableDirective &S, |
5186 | OMPTaskDataTy &Data, |
5187 | CodeGenFunction &CGF, |
5188 | const CapturedStmt *CS, |
5189 | OMPPrivateScope &Scope) { |
5190 | if (Data.Reductions) { |
5191 | OpenMPDirectiveKind CapturedRegion = S.getDirectiveKind(); |
5192 | OMPLexicalScope LexScope(CGF, S, CapturedRegion); |
5193 | ReductionCodeGen RedCG(Data.ReductionVars, Data.ReductionVars, |
5194 | Data.ReductionCopies, Data.ReductionOps); |
5195 | llvm::Value *ReductionsPtr = CGF.Builder.CreateLoad( |
5196 | Addr: CGF.GetAddrOfLocalVar(CS->getCapturedDecl()->getParam(i: 4))); |
5197 | for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) { |
5198 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5199 | RedCG.emitAggregateType(CGF, N: Cnt); |
5200 | // FIXME: This must removed once the runtime library is fixed. |
5201 | // Emit required threadprivate variables for |
5202 | // initializer/combiner/finalizer. |
5203 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5204 | RCG&: RedCG, N: Cnt); |
5205 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5206 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5207 | Replacement = Address( |
5208 | CGF.EmitScalarConversion(Src: Replacement.emitRawPointer(CGF), |
5209 | SrcTy: CGF.getContext().VoidPtrTy, |
5210 | DstTy: CGF.getContext().getPointerType( |
5211 | T: Data.ReductionCopies[Cnt]->getType()), |
5212 | Loc: Data.ReductionCopies[Cnt]->getExprLoc()), |
5213 | CGF.ConvertTypeForMem(T: Data.ReductionCopies[Cnt]->getType()), |
5214 | Replacement.getAlignment()); |
5215 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5216 | Scope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5217 | } |
5218 | } |
5219 | (void)Scope.Privatize(); |
5220 | SmallVector<const Expr *, 4> InRedVars; |
5221 | SmallVector<const Expr *, 4> InRedPrivs; |
5222 | SmallVector<const Expr *, 4> InRedOps; |
5223 | SmallVector<const Expr *, 4> TaskgroupDescriptors; |
5224 | for (const auto *C : S.getClausesOfKind<OMPInReductionClause>()) { |
5225 | auto IPriv = C->privates().begin(); |
5226 | auto IRed = C->reduction_ops().begin(); |
5227 | auto ITD = C->taskgroup_descriptors().begin(); |
5228 | for (const Expr *Ref : C->varlists()) { |
5229 | InRedVars.emplace_back(Ref); |
5230 | InRedPrivs.emplace_back(*IPriv); |
5231 | InRedOps.emplace_back(*IRed); |
5232 | TaskgroupDescriptors.emplace_back(*ITD); |
5233 | std::advance(IPriv, 1); |
5234 | std::advance(IRed, 1); |
5235 | std::advance(ITD, 1); |
5236 | } |
5237 | } |
5238 | OMPPrivateScope InRedScope(CGF); |
5239 | if (!InRedVars.empty()) { |
5240 | ReductionCodeGen RedCG(InRedVars, InRedVars, InRedPrivs, InRedOps); |
5241 | for (unsigned Cnt = 0, E = InRedVars.size(); Cnt < E; ++Cnt) { |
5242 | RedCG.emitSharedOrigLValue(CGF, N: Cnt); |
5243 | RedCG.emitAggregateType(CGF, N: Cnt); |
5244 | // FIXME: This must removed once the runtime library is fixed. |
5245 | // Emit required threadprivate variables for |
5246 | // initializer/combiner/finalizer. |
5247 | CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, Loc: S.getBeginLoc(), |
5248 | RCG&: RedCG, N: Cnt); |
5249 | llvm::Value *ReductionsPtr; |
5250 | if (const Expr *TRExpr = TaskgroupDescriptors[Cnt]) { |
5251 | ReductionsPtr = |
5252 | CGF.EmitLoadOfScalar(lvalue: CGF.EmitLValue(E: TRExpr), Loc: TRExpr->getExprLoc()); |
5253 | } else { |
5254 | ReductionsPtr = llvm::ConstantPointerNull::get(T: CGF.VoidPtrTy); |
5255 | } |
5256 | Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem( |
5257 | CGF, Loc: S.getBeginLoc(), ReductionsPtr, SharedLVal: RedCG.getSharedLValue(N: Cnt)); |
5258 | Replacement = Address( |
5259 | CGF.EmitScalarConversion( |
5260 | Src: Replacement.emitRawPointer(CGF), SrcTy: CGF.getContext().VoidPtrTy, |
5261 | DstTy: CGF.getContext().getPointerType(T: InRedPrivs[Cnt]->getType()), |
5262 | Loc: InRedPrivs[Cnt]->getExprLoc()), |
5263 | CGF.ConvertTypeForMem(T: InRedPrivs[Cnt]->getType()), |
5264 | Replacement.getAlignment()); |
5265 | Replacement = RedCG.adjustPrivateAddress(CGF, N: Cnt, PrivateAddr: Replacement); |
5266 | InRedScope.addPrivate(LocalVD: RedCG.getBaseDecl(N: Cnt), Addr: Replacement); |
5267 | } |
5268 | } |
5269 | (void)InRedScope.Privatize(); |
5270 | } |
5271 | |
5272 | void CodeGenFunction::EmitOMPTaskDirective(const OMPTaskDirective &S) { |
5273 | // Emit outlined function for task construct. |
5274 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_task); |
5275 | Address CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
5276 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
5277 | const Expr *IfCond = nullptr; |
5278 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
5279 | if (C->getNameModifier() == OMPD_unknown || |
5280 | C->getNameModifier() == OMPD_task) { |
5281 | IfCond = C->getCondition(); |
5282 | break; |
5283 | } |
5284 | } |
5285 | |
5286 | OMPTaskDataTy Data; |
5287 | // Check if we should emit tied or untied task. |
5288 | Data.Tied = !S.getSingleClause<OMPUntiedClause>(); |
5289 | auto &&BodyGen = [CS](CodeGenFunction &CGF, PrePostActionTy &) { |
5290 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
5291 | }; |
5292 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
5293 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
5294 | const OMPTaskDataTy &Data) { |
5295 | CGF.CGM.getOpenMPRuntime().emitTaskCall(CGF, Loc: S.getBeginLoc(), D: S, TaskFunction: OutlinedFn, |
5296 | SharedsTy, Shareds: CapturedStruct, IfCond, |
5297 | Data); |
5298 | }; |
5299 | auto LPCRegion = |
5300 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
5301 | EmitOMPTaskBasedDirective(S, OMPD_task, BodyGen, TaskGen, Data); |
5302 | } |
5303 | |
5304 | void CodeGenFunction::EmitOMPTaskyieldDirective( |
5305 | const OMPTaskyieldDirective &S) { |
5306 | CGM.getOpenMPRuntime().emitTaskyieldCall(CGF&: *this, Loc: S.getBeginLoc()); |
5307 | } |
5308 | |
5309 | void CodeGenFunction::EmitOMPErrorDirective(const OMPErrorDirective &S) { |
5310 | const OMPMessageClause *MC = S.getSingleClause<OMPMessageClause>(); |
5311 | Expr *ME = MC ? MC->getMessageString() : nullptr; |
5312 | const OMPSeverityClause *SC = S.getSingleClause<OMPSeverityClause>(); |
5313 | bool IsFatal = false; |
5314 | if (!SC || SC->getSeverityKind() == OMPC_SEVERITY_fatal) |
5315 | IsFatal = true; |
5316 | CGM.getOpenMPRuntime().emitErrorCall(CGF&: *this, Loc: S.getBeginLoc(), ME, IsFatal); |
5317 | } |
5318 | |
5319 | void CodeGenFunction::EmitOMPBarrierDirective(const OMPBarrierDirective &S) { |
5320 | CGM.getOpenMPRuntime().emitBarrierCall(*this, S.getBeginLoc(), OMPD_barrier); |
5321 | } |
5322 | |
5323 | void CodeGenFunction::EmitOMPTaskwaitDirective(const OMPTaskwaitDirective &S) { |
5324 | OMPTaskDataTy Data; |
5325 | // Build list of dependences |
5326 | buildDependences(S, Data); |
5327 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
5328 | CGM.getOpenMPRuntime().emitTaskwaitCall(CGF&: *this, Loc: S.getBeginLoc(), Data); |
5329 | } |
5330 | |
5331 | bool isSupportedByOpenMPIRBuilder(const OMPTaskgroupDirective &T) { |
5332 | return T.clauses().empty(); |
5333 | } |
5334 | |
5335 | void CodeGenFunction::EmitOMPTaskgroupDirective( |
5336 | const OMPTaskgroupDirective &S) { |
5337 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5338 | if (CGM.getLangOpts().OpenMPIRBuilder && isSupportedByOpenMPIRBuilder(T: S)) { |
5339 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5340 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5341 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5342 | AllocaInsertPt->getIterator()); |
5343 | |
5344 | auto BodyGenCB = [&, this](InsertPointTy AllocaIP, |
5345 | InsertPointTy CodeGenIP) { |
5346 | Builder.restoreIP(IP: CodeGenIP); |
5347 | EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5348 | }; |
5349 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5350 | if (!CapturedStmtInfo) |
5351 | CapturedStmtInfo = &CapStmtInfo; |
5352 | Builder.restoreIP(IP: OMPBuilder.createTaskgroup(Loc: Builder, AllocaIP, BodyGenCB)); |
5353 | return; |
5354 | } |
5355 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
5356 | Action.Enter(CGF); |
5357 | if (const Expr *E = S.getReductionRef()) { |
5358 | SmallVector<const Expr *, 4> LHSs; |
5359 | SmallVector<const Expr *, 4> RHSs; |
5360 | OMPTaskDataTy Data; |
5361 | for (const auto *C : S.getClausesOfKind<OMPTaskReductionClause>()) { |
5362 | Data.ReductionVars.append(C->varlist_begin(), C->varlist_end()); |
5363 | Data.ReductionOrigs.append(C->varlist_begin(), C->varlist_end()); |
5364 | Data.ReductionCopies.append(C->privates().begin(), C->privates().end()); |
5365 | Data.ReductionOps.append(C->reduction_ops().begin(), |
5366 | C->reduction_ops().end()); |
5367 | LHSs.append(C->lhs_exprs().begin(), C->lhs_exprs().end()); |
5368 | RHSs.append(C->rhs_exprs().begin(), C->rhs_exprs().end()); |
5369 | } |
5370 | llvm::Value *ReductionDesc = |
5371 | CGF.CGM.getOpenMPRuntime().emitTaskReductionInit(CGF, Loc: S.getBeginLoc(), |
5372 | LHSExprs: LHSs, RHSExprs: RHSs, Data); |
5373 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
5374 | CGF.EmitVarDecl(D: *VD); |
5375 | CGF.EmitStoreOfScalar(Value: ReductionDesc, Addr: CGF.GetAddrOfLocalVar(VD), |
5376 | /*Volatile=*/false, Ty: E->getType()); |
5377 | } |
5378 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
5379 | }; |
5380 | CGM.getOpenMPRuntime().emitTaskgroupRegion(CGF&: *this, TaskgroupOpGen: CodeGen, Loc: S.getBeginLoc()); |
5381 | } |
5382 | |
5383 | void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) { |
5384 | llvm::AtomicOrdering AO = S.getSingleClause<OMPFlushClause>() |
5385 | ? llvm::AtomicOrdering::NotAtomic |
5386 | : llvm::AtomicOrdering::AcquireRelease; |
5387 | CGM.getOpenMPRuntime().emitFlush( |
5388 | CGF&: *this, |
5389 | Vars: [&S]() -> ArrayRef<const Expr *> { |
5390 | if (const auto *FlushClause = S.getSingleClause<OMPFlushClause>()) |
5391 | return llvm::ArrayRef(FlushClause->varlist_begin(), |
5392 | FlushClause->varlist_end()); |
5393 | return std::nullopt; |
5394 | }(), |
5395 | Loc: S.getBeginLoc(), AO); |
5396 | } |
5397 | |
5398 | void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) { |
5399 | const auto *DO = S.getSingleClause<OMPDepobjClause>(); |
5400 | LValue DOLVal = EmitLValue(E: DO->getDepobj()); |
5401 | if (const auto *DC = S.getSingleClause<OMPDependClause>()) { |
5402 | OMPTaskDataTy::DependData Dependencies(DC->getDependencyKind(), |
5403 | DC->getModifier()); |
5404 | Dependencies.DepExprs.append(DC->varlist_begin(), DC->varlist_end()); |
5405 | Address DepAddr = CGM.getOpenMPRuntime().emitDepobjDependClause( |
5406 | CGF&: *this, Dependencies, Loc: DC->getBeginLoc()); |
5407 | EmitStoreOfScalar(value: DepAddr.emitRawPointer(CGF&: *this), lvalue: DOLVal); |
5408 | return; |
5409 | } |
5410 | if (const auto *DC = S.getSingleClause<OMPDestroyClause>()) { |
5411 | CGM.getOpenMPRuntime().emitDestroyClause(CGF&: *this, DepobjLVal: DOLVal, Loc: DC->getBeginLoc()); |
5412 | return; |
5413 | } |
5414 | if (const auto *UC = S.getSingleClause<OMPUpdateClause>()) { |
5415 | CGM.getOpenMPRuntime().emitUpdateClause( |
5416 | CGF&: *this, DepobjLVal: DOLVal, NewDepKind: UC->getDependencyKind(), Loc: UC->getBeginLoc()); |
5417 | return; |
5418 | } |
5419 | } |
5420 | |
5421 | void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) { |
5422 | if (!OMPParentLoopDirectiveForScan) |
5423 | return; |
5424 | const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan; |
5425 | bool IsInclusive = S.hasClausesOfKind<OMPInclusiveClause>(); |
5426 | SmallVector<const Expr *, 4> Shareds; |
5427 | SmallVector<const Expr *, 4> Privates; |
5428 | SmallVector<const Expr *, 4> LHSs; |
5429 | SmallVector<const Expr *, 4> RHSs; |
5430 | SmallVector<const Expr *, 4> ReductionOps; |
5431 | SmallVector<const Expr *, 4> CopyOps; |
5432 | SmallVector<const Expr *, 4> CopyArrayTemps; |
5433 | SmallVector<const Expr *, 4> CopyArrayElems; |
5434 | for (const auto *C : ParentDir.getClausesOfKind<OMPReductionClause>()) { |
5435 | if (C->getModifier() != OMPC_REDUCTION_inscan) |
5436 | continue; |
5437 | Shareds.append(C->varlist_begin(), C->varlist_end()); |
5438 | Privates.append(in_start: C->privates().begin(), in_end: C->privates().end()); |
5439 | LHSs.append(in_start: C->lhs_exprs().begin(), in_end: C->lhs_exprs().end()); |
5440 | RHSs.append(in_start: C->rhs_exprs().begin(), in_end: C->rhs_exprs().end()); |
5441 | ReductionOps.append(in_start: C->reduction_ops().begin(), in_end: C->reduction_ops().end()); |
5442 | CopyOps.append(in_start: C->copy_ops().begin(), in_end: C->copy_ops().end()); |
5443 | CopyArrayTemps.append(in_start: C->copy_array_temps().begin(), |
5444 | in_end: C->copy_array_temps().end()); |
5445 | CopyArrayElems.append(in_start: C->copy_array_elems().begin(), |
5446 | in_end: C->copy_array_elems().end()); |
5447 | } |
5448 | if (ParentDir.getDirectiveKind() == OMPD_simd || |
5449 | (getLangOpts().OpenMPSimd && |
5450 | isOpenMPSimdDirective(ParentDir.getDirectiveKind()))) { |
5451 | // For simd directive and simd-based directives in simd only mode, use the |
5452 | // following codegen: |
5453 | // int x = 0; |
5454 | // #pragma omp simd reduction(inscan, +: x) |
5455 | // for (..) { |
5456 | // <first part> |
5457 | // #pragma omp scan inclusive(x) |
5458 | // <second part> |
5459 | // } |
5460 | // is transformed to: |
5461 | // int x = 0; |
5462 | // for (..) { |
5463 | // int x_priv = 0; |
5464 | // <first part> |
5465 | // x = x_priv + x; |
5466 | // x_priv = x; |
5467 | // <second part> |
5468 | // } |
5469 | // and |
5470 | // int x = 0; |
5471 | // #pragma omp simd reduction(inscan, +: x) |
5472 | // for (..) { |
5473 | // <first part> |
5474 | // #pragma omp scan exclusive(x) |
5475 | // <second part> |
5476 | // } |
5477 | // to |
5478 | // int x = 0; |
5479 | // for (..) { |
5480 | // int x_priv = 0; |
5481 | // <second part> |
5482 | // int temp = x; |
5483 | // x = x_priv + x; |
5484 | // x_priv = temp; |
5485 | // <first part> |
5486 | // } |
5487 | llvm::BasicBlock *OMPScanReduce = createBasicBlock(name: "omp.inscan.reduce" ); |
5488 | EmitBranch(Block: IsInclusive |
5489 | ? OMPScanReduce |
5490 | : BreakContinueStack.back().ContinueBlock.getBlock()); |
5491 | EmitBlock(BB: OMPScanDispatch); |
5492 | { |
5493 | // New scope for correct construction/destruction of temp variables for |
5494 | // exclusive scan. |
5495 | LexicalScope Scope(*this, S.getSourceRange()); |
5496 | EmitBranch(Block: IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock); |
5497 | EmitBlock(BB: OMPScanReduce); |
5498 | if (!IsInclusive) { |
5499 | // Create temp var and copy LHS value to this temp value. |
5500 | // TMP = LHS; |
5501 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5502 | const Expr *PrivateExpr = Privates[I]; |
5503 | const Expr *TempExpr = CopyArrayTemps[I]; |
5504 | EmitAutoVarDecl( |
5505 | D: *cast<VarDecl>(Val: cast<DeclRefExpr>(Val: TempExpr)->getDecl())); |
5506 | LValue DestLVal = EmitLValue(E: TempExpr); |
5507 | LValue SrcLVal = EmitLValue(E: LHSs[I]); |
5508 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this), |
5509 | SrcAddr: SrcLVal.getAddress(CGF&: *this), |
5510 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5511 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5512 | Copy: CopyOps[I]); |
5513 | } |
5514 | } |
5515 | CGM.getOpenMPRuntime().emitReduction( |
5516 | *this, ParentDir.getEndLoc(), Privates, LHSs, RHSs, ReductionOps, |
5517 | {/*WithNowait=*/true, /*SimpleReduction=*/true, OMPD_simd}); |
5518 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5519 | const Expr *PrivateExpr = Privates[I]; |
5520 | LValue DestLVal; |
5521 | LValue SrcLVal; |
5522 | if (IsInclusive) { |
5523 | DestLVal = EmitLValue(E: RHSs[I]); |
5524 | SrcLVal = EmitLValue(E: LHSs[I]); |
5525 | } else { |
5526 | const Expr *TempExpr = CopyArrayTemps[I]; |
5527 | DestLVal = EmitLValue(E: RHSs[I]); |
5528 | SrcLVal = EmitLValue(E: TempExpr); |
5529 | } |
5530 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this), |
5531 | SrcAddr: SrcLVal.getAddress(CGF&: *this), |
5532 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5533 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5534 | Copy: CopyOps[I]); |
5535 | } |
5536 | } |
5537 | EmitBranch(Block: IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock); |
5538 | OMPScanExitBlock = IsInclusive |
5539 | ? BreakContinueStack.back().ContinueBlock.getBlock() |
5540 | : OMPScanReduce; |
5541 | EmitBlock(BB: OMPAfterScanBlock); |
5542 | return; |
5543 | } |
5544 | if (!IsInclusive) { |
5545 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5546 | EmitBlock(BB: OMPScanExitBlock); |
5547 | } |
5548 | if (OMPFirstScanLoop) { |
5549 | // Emit buffer[i] = red; at the end of the input phase. |
5550 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5551 | .getIterationVariable() |
5552 | ->IgnoreParenImpCasts(); |
5553 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5554 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5555 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5556 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5557 | const Expr *PrivateExpr = Privates[I]; |
5558 | const Expr *OrigExpr = Shareds[I]; |
5559 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5560 | OpaqueValueMapping IdxMapping( |
5561 | *this, |
5562 | cast<OpaqueValueExpr>( |
5563 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5564 | RValue::get(V: IdxVal)); |
5565 | LValue DestLVal = EmitLValue(E: CopyArrayElem); |
5566 | LValue SrcLVal = EmitLValue(E: OrigExpr); |
5567 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this), |
5568 | SrcAddr: SrcLVal.getAddress(CGF&: *this), |
5569 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5570 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5571 | Copy: CopyOps[I]); |
5572 | } |
5573 | } |
5574 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5575 | if (IsInclusive) { |
5576 | EmitBlock(BB: OMPScanExitBlock); |
5577 | EmitBranch(Block: BreakContinueStack.back().ContinueBlock.getBlock()); |
5578 | } |
5579 | EmitBlock(BB: OMPScanDispatch); |
5580 | if (!OMPFirstScanLoop) { |
5581 | // Emit red = buffer[i]; at the entrance to the scan phase. |
5582 | const auto *IVExpr = cast<OMPLoopDirective>(Val: ParentDir) |
5583 | .getIterationVariable() |
5584 | ->IgnoreParenImpCasts(); |
5585 | LValue IdxLVal = EmitLValue(E: IVExpr); |
5586 | llvm::Value *IdxVal = EmitLoadOfScalar(lvalue: IdxLVal, Loc: IVExpr->getExprLoc()); |
5587 | IdxVal = Builder.CreateIntCast(V: IdxVal, DestTy: SizeTy, /*isSigned=*/false); |
5588 | llvm::BasicBlock *ExclusiveExitBB = nullptr; |
5589 | if (!IsInclusive) { |
5590 | llvm::BasicBlock *ContBB = createBasicBlock(name: "omp.exclusive.dec" ); |
5591 | ExclusiveExitBB = createBasicBlock(name: "omp.exclusive.copy.exit" ); |
5592 | llvm::Value *Cmp = Builder.CreateIsNull(Arg: IdxVal); |
5593 | Builder.CreateCondBr(Cond: Cmp, True: ExclusiveExitBB, False: ContBB); |
5594 | EmitBlock(BB: ContBB); |
5595 | // Use idx - 1 iteration for exclusive scan. |
5596 | IdxVal = Builder.CreateNUWSub(LHS: IdxVal, RHS: llvm::ConstantInt::get(Ty: SizeTy, V: 1)); |
5597 | } |
5598 | for (unsigned I = 0, E = CopyArrayElems.size(); I < E; ++I) { |
5599 | const Expr *PrivateExpr = Privates[I]; |
5600 | const Expr *OrigExpr = Shareds[I]; |
5601 | const Expr *CopyArrayElem = CopyArrayElems[I]; |
5602 | OpaqueValueMapping IdxMapping( |
5603 | *this, |
5604 | cast<OpaqueValueExpr>( |
5605 | Val: cast<ArraySubscriptExpr>(Val: CopyArrayElem)->getIdx()), |
5606 | RValue::get(V: IdxVal)); |
5607 | LValue SrcLVal = EmitLValue(E: CopyArrayElem); |
5608 | LValue DestLVal = EmitLValue(E: OrigExpr); |
5609 | EmitOMPCopy(OriginalType: PrivateExpr->getType(), DestAddr: DestLVal.getAddress(CGF&: *this), |
5610 | SrcAddr: SrcLVal.getAddress(CGF&: *this), |
5611 | DestVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: LHSs[I])->getDecl()), |
5612 | SrcVD: cast<VarDecl>(Val: cast<DeclRefExpr>(Val: RHSs[I])->getDecl()), |
5613 | Copy: CopyOps[I]); |
5614 | } |
5615 | if (!IsInclusive) { |
5616 | EmitBlock(BB: ExclusiveExitBB); |
5617 | } |
5618 | } |
5619 | EmitBranch(Block: (OMPFirstScanLoop == IsInclusive) ? OMPBeforeScanBlock |
5620 | : OMPAfterScanBlock); |
5621 | EmitBlock(BB: OMPAfterScanBlock); |
5622 | } |
5623 | |
5624 | void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, |
5625 | const CodeGenLoopTy &CodeGenLoop, |
5626 | Expr *IncExpr) { |
5627 | // Emit the loop iteration variable. |
5628 | const auto *IVExpr = cast<DeclRefExpr>(Val: S.getIterationVariable()); |
5629 | const auto *IVDecl = cast<VarDecl>(Val: IVExpr->getDecl()); |
5630 | EmitVarDecl(D: *IVDecl); |
5631 | |
5632 | // Emit the iterations count variable. |
5633 | // If it is not a variable, Sema decided to calculate iterations count on each |
5634 | // iteration (e.g., it is foldable into a constant). |
5635 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
5636 | EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
5637 | // Emit calculation of the iterations count. |
5638 | EmitIgnoredExpr(E: S.getCalcLastIteration()); |
5639 | } |
5640 | |
5641 | CGOpenMPRuntime &RT = CGM.getOpenMPRuntime(); |
5642 | |
5643 | bool HasLastprivateClause = false; |
5644 | // Check pre-condition. |
5645 | { |
5646 | OMPLoopScope PreInitScope(*this, S); |
5647 | // Skip the entire loop if we don't meet the precondition. |
5648 | // If the condition constant folds and can be elided, avoid emitting the |
5649 | // whole loop. |
5650 | bool CondConstant; |
5651 | llvm::BasicBlock *ContBlock = nullptr; |
5652 | if (ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
5653 | if (!CondConstant) |
5654 | return; |
5655 | } else { |
5656 | llvm::BasicBlock *ThenBlock = createBasicBlock(name: "omp.precond.then" ); |
5657 | ContBlock = createBasicBlock(name: "omp.precond.end" ); |
5658 | emitPreCond(CGF&: *this, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
5659 | TrueCount: getProfileCount(&S)); |
5660 | EmitBlock(BB: ThenBlock); |
5661 | incrementProfileCounter(&S); |
5662 | } |
5663 | |
5664 | emitAlignedClause(*this, S); |
5665 | // Emit 'then' code. |
5666 | { |
5667 | // Emit helper vars inits. |
5668 | |
5669 | LValue LB = EmitOMPHelperVar( |
5670 | *this, cast<DeclRefExpr>( |
5671 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5672 | ? S.getCombinedLowerBoundVariable() |
5673 | : S.getLowerBoundVariable()))); |
5674 | LValue UB = EmitOMPHelperVar( |
5675 | *this, cast<DeclRefExpr>( |
5676 | (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5677 | ? S.getCombinedUpperBoundVariable() |
5678 | : S.getUpperBoundVariable()))); |
5679 | LValue ST = |
5680 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable())); |
5681 | LValue IL = |
5682 | EmitOMPHelperVar(CGF&: *this, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable())); |
5683 | |
5684 | OMPPrivateScope LoopScope(*this); |
5685 | if (EmitOMPFirstprivateClause(S, LoopScope)) { |
5686 | // Emit implicit barrier to synchronize threads and avoid data races |
5687 | // on initialization of firstprivate variables and post-update of |
5688 | // lastprivate variables. |
5689 | CGM.getOpenMPRuntime().emitBarrierCall( |
5690 | *this, S.getBeginLoc(), OMPD_unknown, /*EmitChecks=*/false, |
5691 | /*ForceSimpleCall=*/true); |
5692 | } |
5693 | EmitOMPPrivateClause(S, LoopScope); |
5694 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5695 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5696 | !isOpenMPTeamsDirective(S.getDirectiveKind())) |
5697 | EmitOMPReductionClauseInit(S, LoopScope); |
5698 | HasLastprivateClause = EmitOMPLastprivateClauseInit(S, LoopScope); |
5699 | EmitOMPPrivateLoopCounters(S, LoopScope); |
5700 | (void)LoopScope.Privatize(); |
5701 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
5702 | CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(*this, S); |
5703 | |
5704 | // Detect the distribute schedule kind and chunk. |
5705 | llvm::Value *Chunk = nullptr; |
5706 | OpenMPDistScheduleClauseKind ScheduleKind = OMPC_DIST_SCHEDULE_unknown; |
5707 | if (const auto *C = S.getSingleClause<OMPDistScheduleClause>()) { |
5708 | ScheduleKind = C->getDistScheduleKind(); |
5709 | if (const Expr *Ch = C->getChunkSize()) { |
5710 | Chunk = EmitScalarExpr(E: Ch); |
5711 | Chunk = EmitScalarConversion(Src: Chunk, SrcTy: Ch->getType(), |
5712 | DstTy: S.getIterationVariable()->getType(), |
5713 | Loc: S.getBeginLoc()); |
5714 | } |
5715 | } else { |
5716 | // Default behaviour for dist_schedule clause. |
5717 | CGM.getOpenMPRuntime().getDefaultDistScheduleAndChunk( |
5718 | CGF&: *this, S, ScheduleKind, Chunk); |
5719 | } |
5720 | const unsigned IVSize = getContext().getTypeSize(IVExpr->getType()); |
5721 | const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation(); |
5722 | |
5723 | // OpenMP [2.10.8, distribute Construct, Description] |
5724 | // If dist_schedule is specified, kind must be static. If specified, |
5725 | // iterations are divided into chunks of size chunk_size, chunks are |
5726 | // assigned to the teams of the league in a round-robin fashion in the |
5727 | // order of the team number. When no chunk_size is specified, the |
5728 | // iteration space is divided into chunks that are approximately equal |
5729 | // in size, and at most one chunk is distributed to each team of the |
5730 | // league. The size of the chunks is unspecified in this case. |
5731 | bool StaticChunked = |
5732 | RT.isStaticChunked(ScheduleKind, /* Chunked */ Chunk != nullptr) && |
5733 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); |
5734 | if (RT.isStaticNonchunked(ScheduleKind, |
5735 | /* Chunked */ Chunk != nullptr) || |
5736 | StaticChunked) { |
5737 | CGOpenMPRuntime::StaticRTInput StaticInit( |
5738 | IVSize, IVSigned, /* Ordered = */ false, IL.getAddress(CGF&: *this), |
5739 | LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this), |
5740 | StaticChunked ? Chunk : nullptr); |
5741 | RT.emitDistributeStaticInit(CGF&: *this, Loc: S.getBeginLoc(), SchedKind: ScheduleKind, |
5742 | Values: StaticInit); |
5743 | JumpDest LoopExit = |
5744 | getJumpDestInCurrentScope(Target: createBasicBlock(name: "omp.loop.exit" )); |
5745 | // UB = min(UB, GlobalUB); |
5746 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5747 | ? S.getCombinedEnsureUpperBound() |
5748 | : S.getEnsureUpperBound()); |
5749 | // IV = LB; |
5750 | EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5751 | ? S.getCombinedInit() |
5752 | : S.getInit()); |
5753 | |
5754 | const Expr *Cond = |
5755 | isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()) |
5756 | ? S.getCombinedCond() |
5757 | : S.getCond(); |
5758 | |
5759 | if (StaticChunked) |
5760 | Cond = S.getCombinedDistCond(); |
5761 | |
5762 | // For static unchunked schedules generate: |
5763 | // |
5764 | // 1. For distribute alone, codegen |
5765 | // while (idx <= UB) { |
5766 | // BODY; |
5767 | // ++idx; |
5768 | // } |
5769 | // |
5770 | // 2. When combined with 'for' (e.g. as in 'distribute parallel for') |
5771 | // while (idx <= UB) { |
5772 | // <CodeGen rest of pragma>(LB, UB); |
5773 | // idx += ST; |
5774 | // } |
5775 | // |
5776 | // For static chunk one schedule generate: |
5777 | // |
5778 | // while (IV <= GlobalUB) { |
5779 | // <CodeGen rest of pragma>(LB, UB); |
5780 | // LB += ST; |
5781 | // UB += ST; |
5782 | // UB = min(UB, GlobalUB); |
5783 | // IV = LB; |
5784 | // } |
5785 | // |
5786 | emitCommonSimdLoop( |
5787 | CGF&: *this, S, |
5788 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5789 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
5790 | CGF.EmitOMPSimdInit(D: S); |
5791 | }, |
5792 | BodyCodeGen: [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, |
5793 | StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { |
5794 | CGF.EmitOMPInnerLoop( |
5795 | S, LoopScope.requiresCleanups(), Cond, IncExpr, |
5796 | [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) { |
5797 | CodeGenLoop(CGF, S, LoopExit); |
5798 | }, |
5799 | [&S, StaticChunked](CodeGenFunction &CGF) { |
5800 | if (StaticChunked) { |
5801 | CGF.EmitIgnoredExpr(E: S.getCombinedNextLowerBound()); |
5802 | CGF.EmitIgnoredExpr(E: S.getCombinedNextUpperBound()); |
5803 | CGF.EmitIgnoredExpr(E: S.getCombinedEnsureUpperBound()); |
5804 | CGF.EmitIgnoredExpr(E: S.getCombinedInit()); |
5805 | } |
5806 | }); |
5807 | }); |
5808 | EmitBlock(BB: LoopExit.getBlock()); |
5809 | // Tell the runtime we are done. |
5810 | RT.emitForStaticFinish(*this, S.getEndLoc(), OMPD_distribute); |
5811 | } else { |
5812 | // Emit the outer loop, which requests its work chunk [LB..UB] from |
5813 | // runtime and runs the inner loop to process it. |
5814 | const OMPLoopArguments LoopArguments = { |
5815 | LB.getAddress(CGF&: *this), UB.getAddress(CGF&: *this), ST.getAddress(CGF&: *this), |
5816 | IL.getAddress(CGF&: *this), Chunk}; |
5817 | EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArgs: LoopArguments, |
5818 | CodeGenLoopContent: CodeGenLoop); |
5819 | } |
5820 | if (isOpenMPSimdDirective(S.getDirectiveKind())) { |
5821 | EmitOMPSimdFinal(S, [IL, &S](CodeGenFunction &CGF) { |
5822 | return CGF.Builder.CreateIsNotNull( |
5823 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5824 | }); |
5825 | } |
5826 | if (isOpenMPSimdDirective(S.getDirectiveKind()) && |
5827 | !isOpenMPParallelDirective(S.getDirectiveKind()) && |
5828 | !isOpenMPTeamsDirective(S.getDirectiveKind())) { |
5829 | EmitOMPReductionClauseFinal(S, OMPD_simd); |
5830 | // Emit post-update of the reduction variables if IsLastIter != 0. |
5831 | emitPostUpdateForReductionClause( |
5832 | *this, S, [IL, &S](CodeGenFunction &CGF) { |
5833 | return CGF.Builder.CreateIsNotNull( |
5834 | CGF.EmitLoadOfScalar(IL, S.getBeginLoc())); |
5835 | }); |
5836 | } |
5837 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
5838 | if (HasLastprivateClause) { |
5839 | EmitOMPLastprivateClauseFinal( |
5840 | D: S, /*NoFinals=*/false, |
5841 | IsLastIterCond: Builder.CreateIsNotNull(Arg: EmitLoadOfScalar(IL, S.getBeginLoc()))); |
5842 | } |
5843 | } |
5844 | |
5845 | // We're now done with the loop, so jump to the continuation block. |
5846 | if (ContBlock) { |
5847 | EmitBranch(Block: ContBlock); |
5848 | EmitBlock(BB: ContBlock, IsFinished: true); |
5849 | } |
5850 | } |
5851 | } |
5852 | |
5853 | void CodeGenFunction::EmitOMPDistributeDirective( |
5854 | const OMPDistributeDirective &S) { |
5855 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
5856 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
5857 | }; |
5858 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5859 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen); |
5860 | } |
5861 | |
5862 | static llvm::Function *emitOutlinedOrderedFunction(CodeGenModule &CGM, |
5863 | const CapturedStmt *S, |
5864 | SourceLocation Loc) { |
5865 | CodeGenFunction CGF(CGM, /*suppressNewContext=*/true); |
5866 | CodeGenFunction::CGCapturedStmtInfo CapStmtInfo; |
5867 | CGF.CapturedStmtInfo = &CapStmtInfo; |
5868 | llvm::Function *Fn = CGF.GenerateOpenMPCapturedStmtFunction(S: *S, Loc); |
5869 | Fn->setDoesNotRecurse(); |
5870 | return Fn; |
5871 | } |
5872 | |
5873 | template <typename T> |
5874 | static void emitRestoreIP(CodeGenFunction &CGF, const T *C, |
5875 | llvm::OpenMPIRBuilder::InsertPointTy AllocaIP, |
5876 | llvm::OpenMPIRBuilder &OMPBuilder) { |
5877 | |
5878 | unsigned NumLoops = C->getNumLoops(); |
5879 | QualType Int64Ty = CGF.CGM.getContext().getIntTypeForBitwidth( |
5880 | /*DestWidth=*/64, /*Signed=*/1); |
5881 | llvm::SmallVector<llvm::Value *> StoreValues; |
5882 | for (unsigned I = 0; I < NumLoops; I++) { |
5883 | const Expr *CounterVal = C->getLoopData(I); |
5884 | assert(CounterVal); |
5885 | llvm::Value *StoreValue = CGF.EmitScalarConversion( |
5886 | Src: CGF.EmitScalarExpr(E: CounterVal), SrcTy: CounterVal->getType(), DstTy: Int64Ty, |
5887 | Loc: CounterVal->getExprLoc()); |
5888 | StoreValues.emplace_back(Args&: StoreValue); |
5889 | } |
5890 | OMPDoacrossKind<T> ODK; |
5891 | bool IsDependSource = ODK.isSource(C); |
5892 | CGF.Builder.restoreIP( |
5893 | IP: OMPBuilder.createOrderedDepend(Loc: CGF.Builder, AllocaIP, NumLoops, |
5894 | StoreValues, Name: ".cnt.addr" , IsDependSource)); |
5895 | } |
5896 | |
5897 | void CodeGenFunction::EmitOMPOrderedDirective(const OMPOrderedDirective &S) { |
5898 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
5899 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
5900 | using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy; |
5901 | |
5902 | if (S.hasClausesOfKind<OMPDependClause>() || |
5903 | S.hasClausesOfKind<OMPDoacrossClause>()) { |
5904 | // The ordered directive with depend clause. |
5905 | assert(!S.hasAssociatedStmt() && "No associated statement must be in " |
5906 | "ordered depend|doacross construct." ); |
5907 | InsertPointTy AllocaIP(AllocaInsertPt->getParent(), |
5908 | AllocaInsertPt->getIterator()); |
5909 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5910 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5911 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5912 | emitRestoreIP(*this, DC, AllocaIP, OMPBuilder); |
5913 | } else { |
5914 | // The ordered directive with threads or simd clause, or without clause. |
5915 | // Without clause, it behaves as if the threads clause is specified. |
5916 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5917 | |
5918 | auto FiniCB = [this](InsertPointTy IP) { |
5919 | OMPBuilderCBHelpers::FinalizeOMPRegion(CGF&: *this, IP); |
5920 | }; |
5921 | |
5922 | auto BodyGenCB = [&S, C, this](InsertPointTy AllocaIP, |
5923 | InsertPointTy CodeGenIP) { |
5924 | Builder.restoreIP(IP: CodeGenIP); |
5925 | |
5926 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5927 | if (C) { |
5928 | llvm::BasicBlock *FiniBB = splitBBWithSuffix( |
5929 | Builder, /*CreateBranch=*/false, Suffix: ".ordered.after" ); |
5930 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5931 | GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
5932 | llvm::Function *OutlinedFn = |
5933 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5934 | assert(S.getBeginLoc().isValid() && |
5935 | "Outlined function call location must be valid." ); |
5936 | ApplyDebugLocation::CreateDefaultArtificial(CGF&: *this, TemporaryLocation: S.getBeginLoc()); |
5937 | OMPBuilderCBHelpers::EmitCaptureStmt(CGF&: *this, CodeGenIP, FiniBB&: *FiniBB, |
5938 | Fn: OutlinedFn, Args: CapturedVars); |
5939 | } else { |
5940 | OMPBuilderCBHelpers::EmitOMPInlinedRegionBody( |
5941 | CGF&: *this, RegionBodyStmt: CS->getCapturedStmt(), AllocaIP, CodeGenIP, RegionName: "ordered" ); |
5942 | } |
5943 | }; |
5944 | |
5945 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5946 | Builder.restoreIP( |
5947 | IP: OMPBuilder.createOrderedThreadsSimd(Loc: Builder, BodyGenCB, FiniCB, IsThreads: !C)); |
5948 | } |
5949 | return; |
5950 | } |
5951 | |
5952 | if (S.hasClausesOfKind<OMPDependClause>()) { |
5953 | assert(!S.hasAssociatedStmt() && |
5954 | "No associated statement must be in ordered depend construct." ); |
5955 | for (const auto *DC : S.getClausesOfKind<OMPDependClause>()) |
5956 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5957 | return; |
5958 | } |
5959 | if (S.hasClausesOfKind<OMPDoacrossClause>()) { |
5960 | assert(!S.hasAssociatedStmt() && |
5961 | "No associated statement must be in ordered doacross construct." ); |
5962 | for (const auto *DC : S.getClausesOfKind<OMPDoacrossClause>()) |
5963 | CGM.getOpenMPRuntime().emitDoacrossOrdered(*this, DC); |
5964 | return; |
5965 | } |
5966 | const auto *C = S.getSingleClause<OMPSIMDClause>(); |
5967 | auto &&CodeGen = [&S, C, this](CodeGenFunction &CGF, |
5968 | PrePostActionTy &Action) { |
5969 | const CapturedStmt *CS = S.getInnermostCapturedStmt(); |
5970 | if (C) { |
5971 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
5972 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
5973 | llvm::Function *OutlinedFn = |
5974 | emitOutlinedOrderedFunction(CGM, CS, S.getBeginLoc()); |
5975 | CGM.getOpenMPRuntime().emitOutlinedFunctionCall(CGF, Loc: S.getBeginLoc(), |
5976 | OutlinedFn, Args: CapturedVars); |
5977 | } else { |
5978 | Action.Enter(CGF); |
5979 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
5980 | } |
5981 | }; |
5982 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
5983 | CGM.getOpenMPRuntime().emitOrderedRegion(CGF&: *this, OrderedOpGen: CodeGen, Loc: S.getBeginLoc(), IsThreads: !C); |
5984 | } |
5985 | |
5986 | static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val, |
5987 | QualType SrcType, QualType DestType, |
5988 | SourceLocation Loc) { |
5989 | assert(CGF.hasScalarEvaluationKind(DestType) && |
5990 | "DestType must have scalar evaluation kind." ); |
5991 | assert(!Val.isAggregate() && "Must be a scalar or complex." ); |
5992 | return Val.isScalar() ? CGF.EmitScalarConversion(Src: Val.getScalarVal(), SrcTy: SrcType, |
5993 | DstTy: DestType, Loc) |
5994 | : CGF.EmitComplexToScalarConversion( |
5995 | Src: Val.getComplexVal(), SrcTy: SrcType, DstTy: DestType, Loc); |
5996 | } |
5997 | |
5998 | static CodeGenFunction::ComplexPairTy |
5999 | convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType, |
6000 | QualType DestType, SourceLocation Loc) { |
6001 | assert(CGF.getEvaluationKind(DestType) == TEK_Complex && |
6002 | "DestType must have complex evaluation kind." ); |
6003 | CodeGenFunction::ComplexPairTy ComplexVal; |
6004 | if (Val.isScalar()) { |
6005 | // Convert the input element to the element type of the complex. |
6006 | QualType DestElementType = |
6007 | DestType->castAs<ComplexType>()->getElementType(); |
6008 | llvm::Value *ScalarVal = CGF.EmitScalarConversion( |
6009 | Src: Val.getScalarVal(), SrcTy: SrcType, DstTy: DestElementType, Loc); |
6010 | ComplexVal = CodeGenFunction::ComplexPairTy( |
6011 | ScalarVal, llvm::Constant::getNullValue(Ty: ScalarVal->getType())); |
6012 | } else { |
6013 | assert(Val.isComplex() && "Must be a scalar or complex." ); |
6014 | QualType SrcElementType = SrcType->castAs<ComplexType>()->getElementType(); |
6015 | QualType DestElementType = |
6016 | DestType->castAs<ComplexType>()->getElementType(); |
6017 | ComplexVal.first = CGF.EmitScalarConversion( |
6018 | Src: Val.getComplexVal().first, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6019 | ComplexVal.second = CGF.EmitScalarConversion( |
6020 | Src: Val.getComplexVal().second, SrcTy: SrcElementType, DstTy: DestElementType, Loc); |
6021 | } |
6022 | return ComplexVal; |
6023 | } |
6024 | |
6025 | static void emitSimpleAtomicStore(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6026 | LValue LVal, RValue RVal) { |
6027 | if (LVal.isGlobalReg()) |
6028 | CGF.EmitStoreThroughGlobalRegLValue(Src: RVal, Dst: LVal); |
6029 | else |
6030 | CGF.EmitAtomicStore(rvalue: RVal, lvalue: LVal, AO, IsVolatile: LVal.isVolatile(), /*isInit=*/false); |
6031 | } |
6032 | |
6033 | static RValue emitSimpleAtomicLoad(CodeGenFunction &CGF, |
6034 | llvm::AtomicOrdering AO, LValue LVal, |
6035 | SourceLocation Loc) { |
6036 | if (LVal.isGlobalReg()) |
6037 | return CGF.EmitLoadOfLValue(V: LVal, Loc); |
6038 | return CGF.EmitAtomicLoad( |
6039 | lvalue: LVal, loc: Loc, AO: llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(SuccessOrdering: AO), |
6040 | IsVolatile: LVal.isVolatile()); |
6041 | } |
6042 | |
6043 | void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, |
6044 | QualType RValTy, SourceLocation Loc) { |
6045 | switch (getEvaluationKind(T: LVal.getType())) { |
6046 | case TEK_Scalar: |
6047 | EmitStoreThroughLValue(Src: RValue::get(V: convertToScalarValue( |
6048 | CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc)), |
6049 | Dst: LVal); |
6050 | break; |
6051 | case TEK_Complex: |
6052 | EmitStoreOfComplex( |
6053 | V: convertToComplexValue(CGF&: *this, Val: RVal, SrcType: RValTy, DestType: LVal.getType(), Loc), dest: LVal, |
6054 | /*isInit=*/false); |
6055 | break; |
6056 | case TEK_Aggregate: |
6057 | llvm_unreachable("Must be a scalar or complex." ); |
6058 | } |
6059 | } |
6060 | |
6061 | static void emitOMPAtomicReadExpr(CodeGenFunction &CGF, llvm::AtomicOrdering AO, |
6062 | const Expr *X, const Expr *V, |
6063 | SourceLocation Loc) { |
6064 | // v = x; |
6065 | assert(V->isLValue() && "V of 'omp atomic read' is not lvalue" ); |
6066 | assert(X->isLValue() && "X of 'omp atomic read' is not lvalue" ); |
6067 | LValue XLValue = CGF.EmitLValue(E: X); |
6068 | LValue VLValue = CGF.EmitLValue(E: V); |
6069 | RValue Res = emitSimpleAtomicLoad(CGF, AO, LVal: XLValue, Loc); |
6070 | // OpenMP, 2.17.7, atomic Construct |
6071 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6072 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6073 | // operation is also an acquire flush. |
6074 | switch (AO) { |
6075 | case llvm::AtomicOrdering::Acquire: |
6076 | case llvm::AtomicOrdering::AcquireRelease: |
6077 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6078 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6079 | AO: llvm::AtomicOrdering::Acquire); |
6080 | break; |
6081 | case llvm::AtomicOrdering::Monotonic: |
6082 | case llvm::AtomicOrdering::Release: |
6083 | break; |
6084 | case llvm::AtomicOrdering::NotAtomic: |
6085 | case llvm::AtomicOrdering::Unordered: |
6086 | llvm_unreachable("Unexpected ordering." ); |
6087 | } |
6088 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: Res, RValTy: X->getType().getNonReferenceType(), Loc); |
6089 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6090 | } |
6091 | |
6092 | static void emitOMPAtomicWriteExpr(CodeGenFunction &CGF, |
6093 | llvm::AtomicOrdering AO, const Expr *X, |
6094 | const Expr *E, SourceLocation Loc) { |
6095 | // x = expr; |
6096 | assert(X->isLValue() && "X of 'omp atomic write' is not lvalue" ); |
6097 | emitSimpleAtomicStore(CGF, AO, LVal: CGF.EmitLValue(E: X), RVal: CGF.EmitAnyExpr(E)); |
6098 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6099 | // OpenMP, 2.17.7, atomic Construct |
6100 | // If the write, update, or capture clause is specified and the release, |
6101 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6102 | // the atomic operation is also a release flush. |
6103 | switch (AO) { |
6104 | case llvm::AtomicOrdering::Release: |
6105 | case llvm::AtomicOrdering::AcquireRelease: |
6106 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6107 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6108 | AO: llvm::AtomicOrdering::Release); |
6109 | break; |
6110 | case llvm::AtomicOrdering::Acquire: |
6111 | case llvm::AtomicOrdering::Monotonic: |
6112 | break; |
6113 | case llvm::AtomicOrdering::NotAtomic: |
6114 | case llvm::AtomicOrdering::Unordered: |
6115 | llvm_unreachable("Unexpected ordering." ); |
6116 | } |
6117 | } |
6118 | |
6119 | static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X, |
6120 | RValue Update, |
6121 | BinaryOperatorKind BO, |
6122 | llvm::AtomicOrdering AO, |
6123 | bool IsXLHSInRHSPart) { |
6124 | ASTContext &Context = CGF.getContext(); |
6125 | // Allow atomicrmw only if 'x' and 'update' are integer values, lvalue for 'x' |
6126 | // expression is simple and atomic is allowed for the given type for the |
6127 | // target platform. |
6128 | if (BO == BO_Comma || !Update.isScalar() || !X.isSimple() || |
6129 | (!isa<llvm::ConstantInt>(Val: Update.getScalarVal()) && |
6130 | (Update.getScalarVal()->getType() != |
6131 | X.getAddress(CGF).getElementType())) || |
6132 | !Context.getTargetInfo().hasBuiltinAtomic( |
6133 | AtomicSizeInBits: Context.getTypeSize(T: X.getType()), AlignmentInBits: Context.toBits(CharSize: X.getAlignment()))) |
6134 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6135 | |
6136 | auto &&CheckAtomicSupport = [&CGF](llvm::Type *T, BinaryOperatorKind BO) { |
6137 | if (T->isIntegerTy()) |
6138 | return true; |
6139 | |
6140 | if (T->isFloatingPointTy() && (BO == BO_Add || BO == BO_Sub)) |
6141 | return llvm::isPowerOf2_64(Value: CGF.CGM.getDataLayout().getTypeStoreSize(Ty: T)); |
6142 | |
6143 | return false; |
6144 | }; |
6145 | |
6146 | if (!CheckAtomicSupport(Update.getScalarVal()->getType(), BO) || |
6147 | !CheckAtomicSupport(X.getAddress(CGF).getElementType(), BO)) |
6148 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6149 | |
6150 | bool IsInteger = X.getAddress(CGF).getElementType()->isIntegerTy(); |
6151 | llvm::AtomicRMWInst::BinOp RMWOp; |
6152 | switch (BO) { |
6153 | case BO_Add: |
6154 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Add : llvm::AtomicRMWInst::FAdd; |
6155 | break; |
6156 | case BO_Sub: |
6157 | if (!IsXLHSInRHSPart) |
6158 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6159 | RMWOp = IsInteger ? llvm::AtomicRMWInst::Sub : llvm::AtomicRMWInst::FSub; |
6160 | break; |
6161 | case BO_And: |
6162 | RMWOp = llvm::AtomicRMWInst::And; |
6163 | break; |
6164 | case BO_Or: |
6165 | RMWOp = llvm::AtomicRMWInst::Or; |
6166 | break; |
6167 | case BO_Xor: |
6168 | RMWOp = llvm::AtomicRMWInst::Xor; |
6169 | break; |
6170 | case BO_LT: |
6171 | if (IsInteger) |
6172 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6173 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Min |
6174 | : llvm::AtomicRMWInst::Max) |
6175 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMin |
6176 | : llvm::AtomicRMWInst::UMax); |
6177 | else |
6178 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMin |
6179 | : llvm::AtomicRMWInst::FMax; |
6180 | break; |
6181 | case BO_GT: |
6182 | if (IsInteger) |
6183 | RMWOp = X.getType()->hasSignedIntegerRepresentation() |
6184 | ? (IsXLHSInRHSPart ? llvm::AtomicRMWInst::Max |
6185 | : llvm::AtomicRMWInst::Min) |
6186 | : (IsXLHSInRHSPart ? llvm::AtomicRMWInst::UMax |
6187 | : llvm::AtomicRMWInst::UMin); |
6188 | else |
6189 | RMWOp = IsXLHSInRHSPart ? llvm::AtomicRMWInst::FMax |
6190 | : llvm::AtomicRMWInst::FMin; |
6191 | break; |
6192 | case BO_Assign: |
6193 | RMWOp = llvm::AtomicRMWInst::Xchg; |
6194 | break; |
6195 | case BO_Mul: |
6196 | case BO_Div: |
6197 | case BO_Rem: |
6198 | case BO_Shl: |
6199 | case BO_Shr: |
6200 | case BO_LAnd: |
6201 | case BO_LOr: |
6202 | return std::make_pair(x: false, y: RValue::get(V: nullptr)); |
6203 | case BO_PtrMemD: |
6204 | case BO_PtrMemI: |
6205 | case BO_LE: |
6206 | case BO_GE: |
6207 | case BO_EQ: |
6208 | case BO_NE: |
6209 | case BO_Cmp: |
6210 | case BO_AddAssign: |
6211 | case BO_SubAssign: |
6212 | case BO_AndAssign: |
6213 | case BO_OrAssign: |
6214 | case BO_XorAssign: |
6215 | case BO_MulAssign: |
6216 | case BO_DivAssign: |
6217 | case BO_RemAssign: |
6218 | case BO_ShlAssign: |
6219 | case BO_ShrAssign: |
6220 | case BO_Comma: |
6221 | llvm_unreachable("Unsupported atomic update operation" ); |
6222 | } |
6223 | llvm::Value *UpdateVal = Update.getScalarVal(); |
6224 | if (auto *IC = dyn_cast<llvm::ConstantInt>(Val: UpdateVal)) { |
6225 | if (IsInteger) |
6226 | UpdateVal = CGF.Builder.CreateIntCast( |
6227 | V: IC, DestTy: X.getAddress(CGF).getElementType(), |
6228 | isSigned: X.getType()->hasSignedIntegerRepresentation()); |
6229 | else |
6230 | UpdateVal = CGF.Builder.CreateCast(Op: llvm::Instruction::CastOps::UIToFP, V: IC, |
6231 | DestTy: X.getAddress(CGF).getElementType()); |
6232 | } |
6233 | llvm::Value *Res = |
6234 | CGF.Builder.CreateAtomicRMW(Op: RMWOp, Addr: X.getAddress(CGF), Val: UpdateVal, Ordering: AO); |
6235 | return std::make_pair(x: true, y: RValue::get(V: Res)); |
6236 | } |
6237 | |
6238 | std::pair<bool, RValue> CodeGenFunction::EmitOMPAtomicSimpleUpdateExpr( |
6239 | LValue X, RValue E, BinaryOperatorKind BO, bool IsXLHSInRHSPart, |
6240 | llvm::AtomicOrdering AO, SourceLocation Loc, |
6241 | const llvm::function_ref<RValue(RValue)> CommonGen) { |
6242 | // Update expressions are allowed to have the following forms: |
6243 | // x binop= expr; -> xrval + expr; |
6244 | // x++, ++x -> xrval + 1; |
6245 | // x--, --x -> xrval - 1; |
6246 | // x = x binop expr; -> xrval binop expr |
6247 | // x = expr Op x; - > expr binop xrval; |
6248 | auto Res = emitOMPAtomicRMW(CGF&: *this, X, Update: E, BO, AO, IsXLHSInRHSPart); |
6249 | if (!Res.first) { |
6250 | if (X.isGlobalReg()) { |
6251 | // Emit an update expression: 'xrval' binop 'expr' or 'expr' binop |
6252 | // 'xrval'. |
6253 | EmitStoreThroughLValue(Src: CommonGen(EmitLoadOfLValue(V: X, Loc)), Dst: X); |
6254 | } else { |
6255 | // Perform compare-and-swap procedure. |
6256 | EmitAtomicUpdate(LVal: X, AO, UpdateOp: CommonGen, IsVolatile: X.getType().isVolatileQualified()); |
6257 | } |
6258 | } |
6259 | return Res; |
6260 | } |
6261 | |
6262 | static void emitOMPAtomicUpdateExpr(CodeGenFunction &CGF, |
6263 | llvm::AtomicOrdering AO, const Expr *X, |
6264 | const Expr *E, const Expr *UE, |
6265 | bool IsXLHSInRHSPart, SourceLocation Loc) { |
6266 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6267 | "Update expr in 'atomic update' must be a binary operator." ); |
6268 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6269 | // Update expressions are allowed to have the following forms: |
6270 | // x binop= expr; -> xrval + expr; |
6271 | // x++, ++x -> xrval + 1; |
6272 | // x--, --x -> xrval - 1; |
6273 | // x = x binop expr; -> xrval binop expr |
6274 | // x = expr Op x; - > expr binop xrval; |
6275 | assert(X->isLValue() && "X of 'omp atomic update' is not lvalue" ); |
6276 | LValue XLValue = CGF.EmitLValue(E: X); |
6277 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6278 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6279 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6280 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6281 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6282 | auto &&Gen = [&CGF, UE, ExprRValue, XRValExpr, ERValExpr](RValue XRValue) { |
6283 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6284 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6285 | return CGF.EmitAnyExpr(E: UE); |
6286 | }; |
6287 | (void)CGF.EmitOMPAtomicSimpleUpdateExpr( |
6288 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6289 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6290 | // OpenMP, 2.17.7, atomic Construct |
6291 | // If the write, update, or capture clause is specified and the release, |
6292 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6293 | // the atomic operation is also a release flush. |
6294 | switch (AO) { |
6295 | case llvm::AtomicOrdering::Release: |
6296 | case llvm::AtomicOrdering::AcquireRelease: |
6297 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6298 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6299 | AO: llvm::AtomicOrdering::Release); |
6300 | break; |
6301 | case llvm::AtomicOrdering::Acquire: |
6302 | case llvm::AtomicOrdering::Monotonic: |
6303 | break; |
6304 | case llvm::AtomicOrdering::NotAtomic: |
6305 | case llvm::AtomicOrdering::Unordered: |
6306 | llvm_unreachable("Unexpected ordering." ); |
6307 | } |
6308 | } |
6309 | |
6310 | static RValue convertToType(CodeGenFunction &CGF, RValue Value, |
6311 | QualType SourceType, QualType ResType, |
6312 | SourceLocation Loc) { |
6313 | switch (CGF.getEvaluationKind(T: ResType)) { |
6314 | case TEK_Scalar: |
6315 | return RValue::get( |
6316 | V: convertToScalarValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc)); |
6317 | case TEK_Complex: { |
6318 | auto Res = convertToComplexValue(CGF, Val: Value, SrcType: SourceType, DestType: ResType, Loc); |
6319 | return RValue::getComplex(V1: Res.first, V2: Res.second); |
6320 | } |
6321 | case TEK_Aggregate: |
6322 | break; |
6323 | } |
6324 | llvm_unreachable("Must be a scalar or complex." ); |
6325 | } |
6326 | |
6327 | static void emitOMPAtomicCaptureExpr(CodeGenFunction &CGF, |
6328 | llvm::AtomicOrdering AO, |
6329 | bool IsPostfixUpdate, const Expr *V, |
6330 | const Expr *X, const Expr *E, |
6331 | const Expr *UE, bool IsXLHSInRHSPart, |
6332 | SourceLocation Loc) { |
6333 | assert(X->isLValue() && "X of 'omp atomic capture' is not lvalue" ); |
6334 | assert(V->isLValue() && "V of 'omp atomic capture' is not lvalue" ); |
6335 | RValue NewVVal; |
6336 | LValue VLValue = CGF.EmitLValue(E: V); |
6337 | LValue XLValue = CGF.EmitLValue(E: X); |
6338 | RValue ExprRValue = CGF.EmitAnyExpr(E); |
6339 | QualType NewVValType; |
6340 | if (UE) { |
6341 | // 'x' is updated with some additional value. |
6342 | assert(isa<BinaryOperator>(UE->IgnoreImpCasts()) && |
6343 | "Update expr in 'atomic capture' must be a binary operator." ); |
6344 | const auto *BOUE = cast<BinaryOperator>(Val: UE->IgnoreImpCasts()); |
6345 | // Update expressions are allowed to have the following forms: |
6346 | // x binop= expr; -> xrval + expr; |
6347 | // x++, ++x -> xrval + 1; |
6348 | // x--, --x -> xrval - 1; |
6349 | // x = x binop expr; -> xrval binop expr |
6350 | // x = expr Op x; - > expr binop xrval; |
6351 | const auto *LHS = cast<OpaqueValueExpr>(Val: BOUE->getLHS()->IgnoreImpCasts()); |
6352 | const auto *RHS = cast<OpaqueValueExpr>(Val: BOUE->getRHS()->IgnoreImpCasts()); |
6353 | const OpaqueValueExpr *XRValExpr = IsXLHSInRHSPart ? LHS : RHS; |
6354 | NewVValType = XRValExpr->getType(); |
6355 | const OpaqueValueExpr *ERValExpr = IsXLHSInRHSPart ? RHS : LHS; |
6356 | auto &&Gen = [&CGF, &NewVVal, UE, ExprRValue, XRValExpr, ERValExpr, |
6357 | IsPostfixUpdate](RValue XRValue) { |
6358 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6359 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, XRValue); |
6360 | RValue Res = CGF.EmitAnyExpr(E: UE); |
6361 | NewVVal = IsPostfixUpdate ? XRValue : Res; |
6362 | return Res; |
6363 | }; |
6364 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6365 | X: XLValue, E: ExprRValue, BO: BOUE->getOpcode(), IsXLHSInRHSPart, AO, Loc, CommonGen: Gen); |
6366 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6367 | if (Res.first) { |
6368 | // 'atomicrmw' instruction was generated. |
6369 | if (IsPostfixUpdate) { |
6370 | // Use old value from 'atomicrmw'. |
6371 | NewVVal = Res.second; |
6372 | } else { |
6373 | // 'atomicrmw' does not provide new value, so evaluate it using old |
6374 | // value of 'x'. |
6375 | CodeGenFunction::OpaqueValueMapping MapExpr(CGF, ERValExpr, ExprRValue); |
6376 | CodeGenFunction::OpaqueValueMapping MapX(CGF, XRValExpr, Res.second); |
6377 | NewVVal = CGF.EmitAnyExpr(E: UE); |
6378 | } |
6379 | } |
6380 | } else { |
6381 | // 'x' is simply rewritten with some 'expr'. |
6382 | NewVValType = X->getType().getNonReferenceType(); |
6383 | ExprRValue = convertToType(CGF, Value: ExprRValue, SourceType: E->getType(), |
6384 | ResType: X->getType().getNonReferenceType(), Loc); |
6385 | auto &&Gen = [&NewVVal, ExprRValue](RValue XRValue) { |
6386 | NewVVal = XRValue; |
6387 | return ExprRValue; |
6388 | }; |
6389 | // Try to perform atomicrmw xchg, otherwise simple exchange. |
6390 | auto Res = CGF.EmitOMPAtomicSimpleUpdateExpr( |
6391 | X: XLValue, E: ExprRValue, /*BO=*/BO_Assign, /*IsXLHSInRHSPart=*/false, AO, |
6392 | Loc, CommonGen: Gen); |
6393 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: X); |
6394 | if (Res.first) { |
6395 | // 'atomicrmw' instruction was generated. |
6396 | NewVVal = IsPostfixUpdate ? Res.second : ExprRValue; |
6397 | } |
6398 | } |
6399 | // Emit post-update store to 'v' of old/new 'x' value. |
6400 | CGF.emitOMPSimpleStore(LVal: VLValue, RVal: NewVVal, RValTy: NewVValType, Loc); |
6401 | CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF, LHS: V); |
6402 | // OpenMP 5.1 removes the required flush for capture clause. |
6403 | if (CGF.CGM.getLangOpts().OpenMP < 51) { |
6404 | // OpenMP, 2.17.7, atomic Construct |
6405 | // If the write, update, or capture clause is specified and the release, |
6406 | // acq_rel, or seq_cst clause is specified then the strong flush on entry to |
6407 | // the atomic operation is also a release flush. |
6408 | // If the read or capture clause is specified and the acquire, acq_rel, or |
6409 | // seq_cst clause is specified then the strong flush on exit from the atomic |
6410 | // operation is also an acquire flush. |
6411 | switch (AO) { |
6412 | case llvm::AtomicOrdering::Release: |
6413 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6414 | AO: llvm::AtomicOrdering::Release); |
6415 | break; |
6416 | case llvm::AtomicOrdering::Acquire: |
6417 | CGF.CGM.getOpenMPRuntime().emitFlush(CGF, Vars: std::nullopt, Loc, |
6418 | AO: llvm::AtomicOrdering::Acquire); |
6419 | break; |
6420 | case llvm::AtomicOrdering::AcquireRelease: |
6421 | case llvm::AtomicOrdering::SequentiallyConsistent: |
6422 | CGF.CGM.getOpenMPRuntime().emitFlush( |
6423 | CGF, Vars: std::nullopt, Loc, AO: llvm::AtomicOrdering::AcquireRelease); |
6424 | break; |
6425 | case llvm::AtomicOrdering::Monotonic: |
6426 | break; |
6427 | case llvm::AtomicOrdering::NotAtomic: |
6428 | case llvm::AtomicOrdering::Unordered: |
6429 | llvm_unreachable("Unexpected ordering." ); |
6430 | } |
6431 | } |
6432 | } |
6433 | |
6434 | static void emitOMPAtomicCompareExpr( |
6435 | CodeGenFunction &CGF, llvm::AtomicOrdering AO, llvm::AtomicOrdering FailAO, |
6436 | const Expr *X, const Expr *V, const Expr *R, const Expr *E, const Expr *D, |
6437 | const Expr *CE, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly, |
6438 | SourceLocation Loc) { |
6439 | llvm::OpenMPIRBuilder &OMPBuilder = |
6440 | CGF.CGM.getOpenMPRuntime().getOMPBuilder(); |
6441 | |
6442 | OMPAtomicCompareOp Op; |
6443 | assert(isa<BinaryOperator>(CE) && "CE is not a BinaryOperator" ); |
6444 | switch (cast<BinaryOperator>(Val: CE)->getOpcode()) { |
6445 | case BO_EQ: |
6446 | Op = OMPAtomicCompareOp::EQ; |
6447 | break; |
6448 | case BO_LT: |
6449 | Op = OMPAtomicCompareOp::MIN; |
6450 | break; |
6451 | case BO_GT: |
6452 | Op = OMPAtomicCompareOp::MAX; |
6453 | break; |
6454 | default: |
6455 | llvm_unreachable("unsupported atomic compare binary operator" ); |
6456 | } |
6457 | |
6458 | LValue XLVal = CGF.EmitLValue(E: X); |
6459 | Address XAddr = XLVal.getAddress(CGF); |
6460 | |
6461 | auto EmitRValueWithCastIfNeeded = [&CGF, Loc](const Expr *X, const Expr *E) { |
6462 | if (X->getType() == E->getType()) |
6463 | return CGF.EmitScalarExpr(E); |
6464 | const Expr *NewE = E->IgnoreImplicitAsWritten(); |
6465 | llvm::Value *V = CGF.EmitScalarExpr(E: NewE); |
6466 | if (NewE->getType() == X->getType()) |
6467 | return V; |
6468 | return CGF.EmitScalarConversion(Src: V, SrcTy: NewE->getType(), DstTy: X->getType(), Loc); |
6469 | }; |
6470 | |
6471 | llvm::Value *EVal = EmitRValueWithCastIfNeeded(X, E); |
6472 | llvm::Value *DVal = D ? EmitRValueWithCastIfNeeded(X, D) : nullptr; |
6473 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: EVal)) |
6474 | EVal = CGF.Builder.CreateIntCast( |
6475 | V: CI, DestTy: XLVal.getAddress(CGF).getElementType(), |
6476 | isSigned: E->getType()->hasSignedIntegerRepresentation()); |
6477 | if (DVal) |
6478 | if (auto *CI = dyn_cast<llvm::ConstantInt>(Val: DVal)) |
6479 | DVal = CGF.Builder.CreateIntCast( |
6480 | V: CI, DestTy: XLVal.getAddress(CGF).getElementType(), |
6481 | isSigned: D->getType()->hasSignedIntegerRepresentation()); |
6482 | |
6483 | llvm::OpenMPIRBuilder::AtomicOpValue XOpVal{ |
6484 | .Var: XAddr.emitRawPointer(CGF), .ElemTy: XAddr.getElementType(), |
6485 | .IsSigned: X->getType()->hasSignedIntegerRepresentation(), |
6486 | .IsVolatile: X->getType().isVolatileQualified()}; |
6487 | llvm::OpenMPIRBuilder::AtomicOpValue VOpVal, ROpVal; |
6488 | if (V) { |
6489 | LValue LV = CGF.EmitLValue(E: V); |
6490 | Address Addr = LV.getAddress(CGF); |
6491 | VOpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6492 | .IsSigned: V->getType()->hasSignedIntegerRepresentation(), |
6493 | .IsVolatile: V->getType().isVolatileQualified()}; |
6494 | } |
6495 | if (R) { |
6496 | LValue LV = CGF.EmitLValue(E: R); |
6497 | Address Addr = LV.getAddress(CGF); |
6498 | ROpVal = {.Var: Addr.emitRawPointer(CGF), .ElemTy: Addr.getElementType(), |
6499 | .IsSigned: R->getType()->hasSignedIntegerRepresentation(), |
6500 | .IsVolatile: R->getType().isVolatileQualified()}; |
6501 | } |
6502 | |
6503 | if (FailAO == llvm::AtomicOrdering::NotAtomic) { |
6504 | // fail clause was not mentionend on the |
6505 | // "#pragma omp atomic compare" construct. |
6506 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6507 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6508 | IsPostfixUpdate, IsFailOnly)); |
6509 | } else |
6510 | CGF.Builder.restoreIP(IP: OMPBuilder.createAtomicCompare( |
6511 | Loc: CGF.Builder, X&: XOpVal, V&: VOpVal, R&: ROpVal, E: EVal, D: DVal, AO, Op, IsXBinopExpr, |
6512 | IsPostfixUpdate, IsFailOnly, Failure: FailAO)); |
6513 | } |
6514 | |
6515 | static void emitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind, |
6516 | llvm::AtomicOrdering AO, |
6517 | llvm::AtomicOrdering FailAO, bool IsPostfixUpdate, |
6518 | const Expr *X, const Expr *V, const Expr *R, |
6519 | const Expr *E, const Expr *UE, const Expr *D, |
6520 | const Expr *CE, bool IsXLHSInRHSPart, |
6521 | bool IsFailOnly, SourceLocation Loc) { |
6522 | switch (Kind) { |
6523 | case OMPC_read: |
6524 | emitOMPAtomicReadExpr(CGF, AO, X, V, Loc); |
6525 | break; |
6526 | case OMPC_write: |
6527 | emitOMPAtomicWriteExpr(CGF, AO, X, E, Loc); |
6528 | break; |
6529 | case OMPC_unknown: |
6530 | case OMPC_update: |
6531 | emitOMPAtomicUpdateExpr(CGF, AO, X, E, UE, IsXLHSInRHSPart, Loc); |
6532 | break; |
6533 | case OMPC_capture: |
6534 | emitOMPAtomicCaptureExpr(CGF, AO, IsPostfixUpdate, V, X, E, UE, |
6535 | IsXLHSInRHSPart, Loc); |
6536 | break; |
6537 | case OMPC_compare: { |
6538 | emitOMPAtomicCompareExpr(CGF, AO, FailAO, X, V, R, E, D, CE, |
6539 | IsXBinopExpr: IsXLHSInRHSPart, IsPostfixUpdate, IsFailOnly, Loc); |
6540 | break; |
6541 | } |
6542 | default: |
6543 | llvm_unreachable("Clause is not allowed in 'omp atomic'." ); |
6544 | } |
6545 | } |
6546 | |
6547 | void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) { |
6548 | llvm::AtomicOrdering AO = llvm::AtomicOrdering::Monotonic; |
6549 | // Fail Memory Clause Ordering. |
6550 | llvm::AtomicOrdering FailAO = llvm::AtomicOrdering::NotAtomic; |
6551 | bool MemOrderingSpecified = false; |
6552 | if (S.getSingleClause<OMPSeqCstClause>()) { |
6553 | AO = llvm::AtomicOrdering::SequentiallyConsistent; |
6554 | MemOrderingSpecified = true; |
6555 | } else if (S.getSingleClause<OMPAcqRelClause>()) { |
6556 | AO = llvm::AtomicOrdering::AcquireRelease; |
6557 | MemOrderingSpecified = true; |
6558 | } else if (S.getSingleClause<OMPAcquireClause>()) { |
6559 | AO = llvm::AtomicOrdering::Acquire; |
6560 | MemOrderingSpecified = true; |
6561 | } else if (S.getSingleClause<OMPReleaseClause>()) { |
6562 | AO = llvm::AtomicOrdering::Release; |
6563 | MemOrderingSpecified = true; |
6564 | } else if (S.getSingleClause<OMPRelaxedClause>()) { |
6565 | AO = llvm::AtomicOrdering::Monotonic; |
6566 | MemOrderingSpecified = true; |
6567 | } |
6568 | llvm::SmallSet<OpenMPClauseKind, 2> KindsEncountered; |
6569 | OpenMPClauseKind Kind = OMPC_unknown; |
6570 | for (const OMPClause *C : S.clauses()) { |
6571 | // Find first clause (skip seq_cst|acq_rel|aqcuire|release|relaxed clause, |
6572 | // if it is first). |
6573 | OpenMPClauseKind K = C->getClauseKind(); |
6574 | // TBD |
6575 | if (K == OMPC_weak) |
6576 | return; |
6577 | if (K == OMPC_seq_cst || K == OMPC_acq_rel || K == OMPC_acquire || |
6578 | K == OMPC_release || K == OMPC_relaxed || K == OMPC_hint) |
6579 | continue; |
6580 | Kind = K; |
6581 | KindsEncountered.insert(K); |
6582 | } |
6583 | // We just need to correct Kind here. No need to set a bool saying it is |
6584 | // actually compare capture because we can tell from whether V and R are |
6585 | // nullptr. |
6586 | if (KindsEncountered.contains(OMPC_compare) && |
6587 | KindsEncountered.contains(OMPC_capture)) |
6588 | Kind = OMPC_compare; |
6589 | if (!MemOrderingSpecified) { |
6590 | llvm::AtomicOrdering DefaultOrder = |
6591 | CGM.getOpenMPRuntime().getDefaultMemoryOrdering(); |
6592 | if (DefaultOrder == llvm::AtomicOrdering::Monotonic || |
6593 | DefaultOrder == llvm::AtomicOrdering::SequentiallyConsistent || |
6594 | (DefaultOrder == llvm::AtomicOrdering::AcquireRelease && |
6595 | Kind == OMPC_capture)) { |
6596 | AO = DefaultOrder; |
6597 | } else if (DefaultOrder == llvm::AtomicOrdering::AcquireRelease) { |
6598 | if (Kind == OMPC_unknown || Kind == OMPC_update || Kind == OMPC_write) { |
6599 | AO = llvm::AtomicOrdering::Release; |
6600 | } else if (Kind == OMPC_read) { |
6601 | assert(Kind == OMPC_read && "Unexpected atomic kind." ); |
6602 | AO = llvm::AtomicOrdering::Acquire; |
6603 | } |
6604 | } |
6605 | } |
6606 | |
6607 | if (KindsEncountered.contains(OMPC_compare) && |
6608 | KindsEncountered.contains(OMPC_fail)) { |
6609 | Kind = OMPC_compare; |
6610 | const auto *FailClause = S.getSingleClause<OMPFailClause>(); |
6611 | if (FailClause) { |
6612 | OpenMPClauseKind FailParameter = FailClause->getFailParameter(); |
6613 | if (FailParameter == llvm::omp::OMPC_relaxed) |
6614 | FailAO = llvm::AtomicOrdering::Monotonic; |
6615 | else if (FailParameter == llvm::omp::OMPC_acquire) |
6616 | FailAO = llvm::AtomicOrdering::Acquire; |
6617 | else if (FailParameter == llvm::omp::OMPC_seq_cst) |
6618 | FailAO = llvm::AtomicOrdering::SequentiallyConsistent; |
6619 | } |
6620 | } |
6621 | |
6622 | LexicalScope Scope(*this, S.getSourceRange()); |
6623 | EmitStopPoint(S: S.getAssociatedStmt()); |
6624 | emitOMPAtomicExpr(*this, Kind, AO, FailAO, S.isPostfixUpdate(), S.getX(), |
6625 | S.getV(), S.getR(), S.getExpr(), S.getUpdateExpr(), |
6626 | S.getD(), S.getCondExpr(), S.isXLHSInRHSPart(), |
6627 | S.isFailOnly(), S.getBeginLoc()); |
6628 | } |
6629 | |
6630 | static void emitCommonOMPTargetDirective(CodeGenFunction &CGF, |
6631 | const OMPExecutableDirective &S, |
6632 | const RegionCodeGenTy &CodeGen) { |
6633 | assert(isOpenMPTargetExecutionDirective(S.getDirectiveKind())); |
6634 | CodeGenModule &CGM = CGF.CGM; |
6635 | |
6636 | // On device emit this construct as inlined code. |
6637 | if (CGM.getLangOpts().OpenMPIsTargetDevice) { |
6638 | OMPLexicalScope Scope(CGF, S, OMPD_target); |
6639 | CGM.getOpenMPRuntime().emitInlinedDirective( |
6640 | CGF, OMPD_target, [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6641 | CGF.EmitStmt(S.getInnermostCapturedStmt()->getCapturedStmt()); |
6642 | }); |
6643 | return; |
6644 | } |
6645 | |
6646 | auto LPCRegion = CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF, S); |
6647 | llvm::Function *Fn = nullptr; |
6648 | llvm::Constant *FnID = nullptr; |
6649 | |
6650 | const Expr *IfCond = nullptr; |
6651 | // Check for the at most one if clause associated with the target region. |
6652 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
6653 | if (C->getNameModifier() == OMPD_unknown || |
6654 | C->getNameModifier() == OMPD_target) { |
6655 | IfCond = C->getCondition(); |
6656 | break; |
6657 | } |
6658 | } |
6659 | |
6660 | // Check if we have any device clause associated with the directive. |
6661 | llvm::PointerIntPair<const Expr *, 2, OpenMPDeviceClauseModifier> Device( |
6662 | nullptr, OMPC_DEVICE_unknown); |
6663 | if (auto *C = S.getSingleClause<OMPDeviceClause>()) |
6664 | Device.setPointerAndInt(PtrVal: C->getDevice(), IntVal: C->getModifier()); |
6665 | |
6666 | // Check if we have an if clause whose conditional always evaluates to false |
6667 | // or if we do not have any targets specified. If so the target region is not |
6668 | // an offload entry point. |
6669 | bool IsOffloadEntry = true; |
6670 | if (IfCond) { |
6671 | bool Val; |
6672 | if (CGF.ConstantFoldsToSimpleInteger(Cond: IfCond, Result&: Val) && !Val) |
6673 | IsOffloadEntry = false; |
6674 | } |
6675 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
6676 | IsOffloadEntry = false; |
6677 | |
6678 | if (CGM.getLangOpts().OpenMPOffloadMandatory && !IsOffloadEntry) { |
6679 | unsigned DiagID = CGM.getDiags().getCustomDiagID( |
6680 | L: DiagnosticsEngine::Error, |
6681 | FormatString: "No offloading entry generated while offloading is mandatory." ); |
6682 | CGM.getDiags().Report(DiagID); |
6683 | } |
6684 | |
6685 | assert(CGF.CurFuncDecl && "No parent declaration for target region!" ); |
6686 | StringRef ParentName; |
6687 | // In case we have Ctors/Dtors we use the complete type variant to produce |
6688 | // the mangling of the device outlined kernel. |
6689 | if (const auto *D = dyn_cast<CXXConstructorDecl>(Val: CGF.CurFuncDecl)) |
6690 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Ctor_Complete)); |
6691 | else if (const auto *D = dyn_cast<CXXDestructorDecl>(Val: CGF.CurFuncDecl)) |
6692 | ParentName = CGM.getMangledName(GD: GlobalDecl(D, Dtor_Complete)); |
6693 | else |
6694 | ParentName = |
6695 | CGM.getMangledName(GD: GlobalDecl(cast<FunctionDecl>(Val: CGF.CurFuncDecl))); |
6696 | |
6697 | // Emit target region as a standalone region. |
6698 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction(D: S, ParentName, OutlinedFn&: Fn, OutlinedFnID&: FnID, |
6699 | IsOffloadEntry, CodeGen); |
6700 | OMPLexicalScope Scope(CGF, S, OMPD_task); |
6701 | auto &&SizeEmitter = |
6702 | [IsOffloadEntry](CodeGenFunction &CGF, |
6703 | const OMPLoopDirective &D) -> llvm::Value * { |
6704 | if (IsOffloadEntry) { |
6705 | OMPLoopScope(CGF, D); |
6706 | // Emit calculation of the iterations count. |
6707 | llvm::Value *NumIterations = CGF.EmitScalarExpr(E: D.getNumIterations()); |
6708 | NumIterations = CGF.Builder.CreateIntCast(V: NumIterations, DestTy: CGF.Int64Ty, |
6709 | /*isSigned=*/false); |
6710 | return NumIterations; |
6711 | } |
6712 | return nullptr; |
6713 | }; |
6714 | CGM.getOpenMPRuntime().emitTargetCall(CGF, D: S, OutlinedFn: Fn, OutlinedFnID: FnID, IfCond, Device, |
6715 | SizeEmitter); |
6716 | } |
6717 | |
6718 | static void emitTargetRegion(CodeGenFunction &CGF, const OMPTargetDirective &S, |
6719 | PrePostActionTy &Action) { |
6720 | Action.Enter(CGF); |
6721 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6722 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6723 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6724 | (void)PrivateScope.Privatize(); |
6725 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6726 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6727 | |
6728 | CGF.EmitStmt(S.getCapturedStmt(OMPD_target)->getCapturedStmt()); |
6729 | CGF.EnsureInsertPoint(); |
6730 | } |
6731 | |
6732 | void CodeGenFunction::EmitOMPTargetDeviceFunction(CodeGenModule &CGM, |
6733 | StringRef ParentName, |
6734 | const OMPTargetDirective &S) { |
6735 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6736 | emitTargetRegion(CGF, S, Action); |
6737 | }; |
6738 | llvm::Function *Fn; |
6739 | llvm::Constant *Addr; |
6740 | // Emit target region as a standalone region. |
6741 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6742 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6743 | assert(Fn && Addr && "Target device function emission failed." ); |
6744 | } |
6745 | |
6746 | void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &S) { |
6747 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6748 | emitTargetRegion(CGF, S, Action); |
6749 | }; |
6750 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6751 | } |
6752 | |
6753 | static void emitCommonOMPTeamsDirective(CodeGenFunction &CGF, |
6754 | const OMPExecutableDirective &S, |
6755 | OpenMPDirectiveKind InnermostKind, |
6756 | const RegionCodeGenTy &CodeGen) { |
6757 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_teams); |
6758 | llvm::Function *OutlinedFn = |
6759 | CGF.CGM.getOpenMPRuntime().emitTeamsOutlinedFunction( |
6760 | CGF, S, *CS->getCapturedDecl()->param_begin(), InnermostKind, |
6761 | CodeGen); |
6762 | |
6763 | const auto *NT = S.getSingleClause<OMPNumTeamsClause>(); |
6764 | const auto *TL = S.getSingleClause<OMPThreadLimitClause>(); |
6765 | if (NT || TL) { |
6766 | const Expr *NumTeams = NT ? NT->getNumTeams() : nullptr; |
6767 | const Expr *ThreadLimit = TL ? TL->getThreadLimit() : nullptr; |
6768 | |
6769 | CGF.CGM.getOpenMPRuntime().emitNumTeamsClause(CGF, NumTeams, ThreadLimit, |
6770 | Loc: S.getBeginLoc()); |
6771 | } |
6772 | |
6773 | OMPTeamsScope Scope(CGF, S); |
6774 | llvm::SmallVector<llvm::Value *, 16> CapturedVars; |
6775 | CGF.GenerateOpenMPCapturedVars(S: *CS, CapturedVars); |
6776 | CGF.CGM.getOpenMPRuntime().emitTeamsCall(CGF, D: S, Loc: S.getBeginLoc(), OutlinedFn, |
6777 | CapturedVars); |
6778 | } |
6779 | |
6780 | void CodeGenFunction::EmitOMPTeamsDirective(const OMPTeamsDirective &S) { |
6781 | // Emit teams region as a standalone region. |
6782 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6783 | Action.Enter(CGF); |
6784 | OMPPrivateScope PrivateScope(CGF); |
6785 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6786 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6787 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6788 | (void)PrivateScope.Privatize(); |
6789 | CGF.EmitStmt(S.getCapturedStmt(OMPD_teams)->getCapturedStmt()); |
6790 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6791 | }; |
6792 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6793 | emitPostUpdateForReductionClause(*this, S, |
6794 | [](CodeGenFunction &) { return nullptr; }); |
6795 | } |
6796 | |
6797 | static void emitTargetTeamsRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6798 | const OMPTargetTeamsDirective &S) { |
6799 | auto *CS = S.getCapturedStmt(OMPD_teams); |
6800 | Action.Enter(CGF); |
6801 | // Emit teams region as a standalone region. |
6802 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6803 | Action.Enter(CGF); |
6804 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6805 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
6806 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
6807 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6808 | (void)PrivateScope.Privatize(); |
6809 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
6810 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
6811 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
6812 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6813 | }; |
6814 | emitCommonOMPTeamsDirective(CGF, S, OMPD_teams, CodeGen); |
6815 | emitPostUpdateForReductionClause(CGF, S, |
6816 | [](CodeGenFunction &) { return nullptr; }); |
6817 | } |
6818 | |
6819 | void CodeGenFunction::EmitOMPTargetTeamsDeviceFunction( |
6820 | CodeGenModule &CGM, StringRef ParentName, |
6821 | const OMPTargetTeamsDirective &S) { |
6822 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6823 | emitTargetTeamsRegion(CGF, Action, S); |
6824 | }; |
6825 | llvm::Function *Fn; |
6826 | llvm::Constant *Addr; |
6827 | // Emit target region as a standalone region. |
6828 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6829 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6830 | assert(Fn && Addr && "Target device function emission failed." ); |
6831 | } |
6832 | |
6833 | void CodeGenFunction::EmitOMPTargetTeamsDirective( |
6834 | const OMPTargetTeamsDirective &S) { |
6835 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6836 | emitTargetTeamsRegion(CGF, Action, S); |
6837 | }; |
6838 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6839 | } |
6840 | |
6841 | static void |
6842 | emitTargetTeamsDistributeRegion(CodeGenFunction &CGF, PrePostActionTy &Action, |
6843 | const OMPTargetTeamsDistributeDirective &S) { |
6844 | Action.Enter(CGF); |
6845 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6846 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6847 | }; |
6848 | |
6849 | // Emit teams region as a standalone region. |
6850 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6851 | PrePostActionTy &Action) { |
6852 | Action.Enter(CGF); |
6853 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6854 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6855 | (void)PrivateScope.Privatize(); |
6856 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6857 | CodeGenDistribute); |
6858 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6859 | }; |
6860 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); |
6861 | emitPostUpdateForReductionClause(CGF, S, |
6862 | [](CodeGenFunction &) { return nullptr; }); |
6863 | } |
6864 | |
6865 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction( |
6866 | CodeGenModule &CGM, StringRef ParentName, |
6867 | const OMPTargetTeamsDistributeDirective &S) { |
6868 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6869 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6870 | }; |
6871 | llvm::Function *Fn; |
6872 | llvm::Constant *Addr; |
6873 | // Emit target region as a standalone region. |
6874 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6875 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6876 | assert(Fn && Addr && "Target device function emission failed." ); |
6877 | } |
6878 | |
6879 | void CodeGenFunction::EmitOMPTargetTeamsDistributeDirective( |
6880 | const OMPTargetTeamsDistributeDirective &S) { |
6881 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6882 | emitTargetTeamsDistributeRegion(CGF, Action, S); |
6883 | }; |
6884 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6885 | } |
6886 | |
6887 | static void emitTargetTeamsDistributeSimdRegion( |
6888 | CodeGenFunction &CGF, PrePostActionTy &Action, |
6889 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6890 | Action.Enter(CGF); |
6891 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6892 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6893 | }; |
6894 | |
6895 | // Emit teams region as a standalone region. |
6896 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6897 | PrePostActionTy &Action) { |
6898 | Action.Enter(CGF); |
6899 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
6900 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6901 | (void)PrivateScope.Privatize(); |
6902 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6903 | CodeGenDistribute); |
6904 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6905 | }; |
6906 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_simd, CodeGen); |
6907 | emitPostUpdateForReductionClause(CGF, S, |
6908 | [](CodeGenFunction &) { return nullptr; }); |
6909 | } |
6910 | |
6911 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction( |
6912 | CodeGenModule &CGM, StringRef ParentName, |
6913 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6914 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6915 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6916 | }; |
6917 | llvm::Function *Fn; |
6918 | llvm::Constant *Addr; |
6919 | // Emit target region as a standalone region. |
6920 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
6921 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
6922 | assert(Fn && Addr && "Target device function emission failed." ); |
6923 | } |
6924 | |
6925 | void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective( |
6926 | const OMPTargetTeamsDistributeSimdDirective &S) { |
6927 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
6928 | emitTargetTeamsDistributeSimdRegion(CGF, Action, S); |
6929 | }; |
6930 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
6931 | } |
6932 | |
6933 | void CodeGenFunction::EmitOMPTeamsDistributeDirective( |
6934 | const OMPTeamsDistributeDirective &S) { |
6935 | |
6936 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6937 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6938 | }; |
6939 | |
6940 | // Emit teams region as a standalone region. |
6941 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6942 | PrePostActionTy &Action) { |
6943 | Action.Enter(CGF); |
6944 | OMPPrivateScope PrivateScope(CGF); |
6945 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6946 | (void)PrivateScope.Privatize(); |
6947 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6948 | CodeGenDistribute); |
6949 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6950 | }; |
6951 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
6952 | emitPostUpdateForReductionClause(*this, S, |
6953 | [](CodeGenFunction &) { return nullptr; }); |
6954 | } |
6955 | |
6956 | void CodeGenFunction::EmitOMPTeamsDistributeSimdDirective( |
6957 | const OMPTeamsDistributeSimdDirective &S) { |
6958 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6959 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
6960 | }; |
6961 | |
6962 | // Emit teams region as a standalone region. |
6963 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6964 | PrePostActionTy &Action) { |
6965 | Action.Enter(CGF); |
6966 | OMPPrivateScope PrivateScope(CGF); |
6967 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6968 | (void)PrivateScope.Privatize(); |
6969 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_simd, |
6970 | CodeGenDistribute); |
6971 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6972 | }; |
6973 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_simd, CodeGen); |
6974 | emitPostUpdateForReductionClause(*this, S, |
6975 | [](CodeGenFunction &) { return nullptr; }); |
6976 | } |
6977 | |
6978 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForDirective( |
6979 | const OMPTeamsDistributeParallelForDirective &S) { |
6980 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
6981 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
6982 | IncExpr: S.getDistInc()); |
6983 | }; |
6984 | |
6985 | // Emit teams region as a standalone region. |
6986 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
6987 | PrePostActionTy &Action) { |
6988 | Action.Enter(CGF); |
6989 | OMPPrivateScope PrivateScope(CGF); |
6990 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
6991 | (void)PrivateScope.Privatize(); |
6992 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
6993 | CodeGenDistribute); |
6994 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
6995 | }; |
6996 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for, CodeGen); |
6997 | emitPostUpdateForReductionClause(*this, S, |
6998 | [](CodeGenFunction &) { return nullptr; }); |
6999 | } |
7000 | |
7001 | void CodeGenFunction::EmitOMPTeamsDistributeParallelForSimdDirective( |
7002 | const OMPTeamsDistributeParallelForSimdDirective &S) { |
7003 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7004 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7005 | IncExpr: S.getDistInc()); |
7006 | }; |
7007 | |
7008 | // Emit teams region as a standalone region. |
7009 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7010 | PrePostActionTy &Action) { |
7011 | Action.Enter(CGF); |
7012 | OMPPrivateScope PrivateScope(CGF); |
7013 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7014 | (void)PrivateScope.Privatize(); |
7015 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7016 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7017 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7018 | }; |
7019 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute_parallel_for_simd, |
7020 | CodeGen); |
7021 | emitPostUpdateForReductionClause(*this, S, |
7022 | [](CodeGenFunction &) { return nullptr; }); |
7023 | } |
7024 | |
7025 | void CodeGenFunction::EmitOMPInteropDirective(const OMPInteropDirective &S) { |
7026 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7027 | llvm::Value *Device = nullptr; |
7028 | llvm::Value *NumDependences = nullptr; |
7029 | llvm::Value *DependenceList = nullptr; |
7030 | |
7031 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7032 | Device = EmitScalarExpr(E: C->getDevice()); |
7033 | |
7034 | // Build list and emit dependences |
7035 | OMPTaskDataTy Data; |
7036 | buildDependences(S, Data); |
7037 | if (!Data.Dependences.empty()) { |
7038 | Address DependenciesArray = Address::invalid(); |
7039 | std::tie(args&: NumDependences, args&: DependenciesArray) = |
7040 | CGM.getOpenMPRuntime().emitDependClause(CGF&: *this, Dependencies: Data.Dependences, |
7041 | Loc: S.getBeginLoc()); |
7042 | DependenceList = DependenciesArray.emitRawPointer(CGF&: *this); |
7043 | } |
7044 | Data.HasNowaitClause = S.hasClausesOfKind<OMPNowaitClause>(); |
7045 | |
7046 | assert(!(Data.HasNowaitClause && !(S.getSingleClause<OMPInitClause>() || |
7047 | S.getSingleClause<OMPDestroyClause>() || |
7048 | S.getSingleClause<OMPUseClause>())) && |
7049 | "OMPNowaitClause clause is used separately in OMPInteropDirective." ); |
7050 | |
7051 | auto ItOMPInitClause = S.getClausesOfKind<OMPInitClause>(); |
7052 | if (!ItOMPInitClause.empty()) { |
7053 | // Look at the multiple init clauses |
7054 | for (const OMPInitClause *C : ItOMPInitClause) { |
7055 | llvm::Value *InteropvarPtr = |
7056 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7057 | llvm::omp::OMPInteropType InteropType = |
7058 | llvm::omp::OMPInteropType::Unknown; |
7059 | if (C->getIsTarget()) { |
7060 | InteropType = llvm::omp::OMPInteropType::Target; |
7061 | } else { |
7062 | assert(C->getIsTargetSync() && |
7063 | "Expected interop-type target/targetsync" ); |
7064 | InteropType = llvm::omp::OMPInteropType::TargetSync; |
7065 | } |
7066 | OMPBuilder.createOMPInteropInit(Builder, InteropvarPtr, InteropType, |
7067 | Device, NumDependences, DependenceList, |
7068 | Data.HasNowaitClause); |
7069 | } |
7070 | } |
7071 | auto ItOMPDestroyClause = S.getClausesOfKind<OMPDestroyClause>(); |
7072 | if (!ItOMPDestroyClause.empty()) { |
7073 | // Look at the multiple destroy clauses |
7074 | for (const OMPDestroyClause *C : ItOMPDestroyClause) { |
7075 | llvm::Value *InteropvarPtr = |
7076 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7077 | OMPBuilder.createOMPInteropDestroy(Builder, InteropvarPtr, Device, |
7078 | NumDependences, DependenceList, |
7079 | Data.HasNowaitClause); |
7080 | } |
7081 | } |
7082 | auto ItOMPUseClause = S.getClausesOfKind<OMPUseClause>(); |
7083 | if (!ItOMPUseClause.empty()) { |
7084 | // Look at the multiple use clauses |
7085 | for (const OMPUseClause *C : ItOMPUseClause) { |
7086 | llvm::Value *InteropvarPtr = |
7087 | EmitLValue(C->getInteropVar()).getPointer(*this); |
7088 | OMPBuilder.createOMPInteropUse(Builder, InteropvarPtr, Device, |
7089 | NumDependences, DependenceList, |
7090 | Data.HasNowaitClause); |
7091 | } |
7092 | } |
7093 | } |
7094 | |
7095 | static void emitTargetTeamsDistributeParallelForRegion( |
7096 | CodeGenFunction &CGF, const OMPTargetTeamsDistributeParallelForDirective &S, |
7097 | PrePostActionTy &Action) { |
7098 | Action.Enter(CGF); |
7099 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7100 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7101 | IncExpr: S.getDistInc()); |
7102 | }; |
7103 | |
7104 | // Emit teams region as a standalone region. |
7105 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7106 | PrePostActionTy &Action) { |
7107 | Action.Enter(CGF); |
7108 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7109 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7110 | (void)PrivateScope.Privatize(); |
7111 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7112 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7113 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7114 | }; |
7115 | |
7116 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
7117 | CodeGenTeams); |
7118 | emitPostUpdateForReductionClause(CGF, S, |
7119 | [](CodeGenFunction &) { return nullptr; }); |
7120 | } |
7121 | |
7122 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction( |
7123 | CodeGenModule &CGM, StringRef ParentName, |
7124 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7125 | // Emit SPMD target teams distribute parallel for region as a standalone |
7126 | // region. |
7127 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7128 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7129 | }; |
7130 | llvm::Function *Fn; |
7131 | llvm::Constant *Addr; |
7132 | // Emit target region as a standalone region. |
7133 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7134 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7135 | assert(Fn && Addr && "Target device function emission failed." ); |
7136 | } |
7137 | |
7138 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDirective( |
7139 | const OMPTargetTeamsDistributeParallelForDirective &S) { |
7140 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7141 | emitTargetTeamsDistributeParallelForRegion(CGF, S, Action); |
7142 | }; |
7143 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7144 | } |
7145 | |
7146 | static void emitTargetTeamsDistributeParallelForSimdRegion( |
7147 | CodeGenFunction &CGF, |
7148 | const OMPTargetTeamsDistributeParallelForSimdDirective &S, |
7149 | PrePostActionTy &Action) { |
7150 | Action.Enter(CGF); |
7151 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7152 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7153 | IncExpr: S.getDistInc()); |
7154 | }; |
7155 | |
7156 | // Emit teams region as a standalone region. |
7157 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7158 | PrePostActionTy &Action) { |
7159 | Action.Enter(CGF); |
7160 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7161 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7162 | (void)PrivateScope.Privatize(); |
7163 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7164 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7165 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7166 | }; |
7167 | |
7168 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for_simd, |
7169 | CodeGenTeams); |
7170 | emitPostUpdateForReductionClause(CGF, S, |
7171 | [](CodeGenFunction &) { return nullptr; }); |
7172 | } |
7173 | |
7174 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction( |
7175 | CodeGenModule &CGM, StringRef ParentName, |
7176 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7177 | // Emit SPMD target teams distribute parallel for simd region as a standalone |
7178 | // region. |
7179 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7180 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7181 | }; |
7182 | llvm::Function *Fn; |
7183 | llvm::Constant *Addr; |
7184 | // Emit target region as a standalone region. |
7185 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7186 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7187 | assert(Fn && Addr && "Target device function emission failed." ); |
7188 | } |
7189 | |
7190 | void CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForSimdDirective( |
7191 | const OMPTargetTeamsDistributeParallelForSimdDirective &S) { |
7192 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7193 | emitTargetTeamsDistributeParallelForSimdRegion(CGF, S, Action); |
7194 | }; |
7195 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7196 | } |
7197 | |
7198 | void CodeGenFunction::EmitOMPCancellationPointDirective( |
7199 | const OMPCancellationPointDirective &S) { |
7200 | CGM.getOpenMPRuntime().emitCancellationPointCall(*this, S.getBeginLoc(), |
7201 | S.getCancelRegion()); |
7202 | } |
7203 | |
7204 | void CodeGenFunction::EmitOMPCancelDirective(const OMPCancelDirective &S) { |
7205 | const Expr *IfCond = nullptr; |
7206 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7207 | if (C->getNameModifier() == OMPD_unknown || |
7208 | C->getNameModifier() == OMPD_cancel) { |
7209 | IfCond = C->getCondition(); |
7210 | break; |
7211 | } |
7212 | } |
7213 | if (CGM.getLangOpts().OpenMPIRBuilder) { |
7214 | llvm::OpenMPIRBuilder &OMPBuilder = CGM.getOpenMPRuntime().getOMPBuilder(); |
7215 | // TODO: This check is necessary as we only generate `omp parallel` through |
7216 | // the OpenMPIRBuilder for now. |
7217 | if (S.getCancelRegion() == OMPD_parallel || |
7218 | S.getCancelRegion() == OMPD_sections || |
7219 | S.getCancelRegion() == OMPD_section) { |
7220 | llvm::Value *IfCondition = nullptr; |
7221 | if (IfCond) |
7222 | IfCondition = EmitScalarExpr(E: IfCond, |
7223 | /*IgnoreResultAssign=*/true); |
7224 | return Builder.restoreIP( |
7225 | IP: OMPBuilder.createCancel(Builder, IfCondition, S.getCancelRegion())); |
7226 | } |
7227 | } |
7228 | |
7229 | CGM.getOpenMPRuntime().emitCancelCall(*this, S.getBeginLoc(), IfCond, |
7230 | S.getCancelRegion()); |
7231 | } |
7232 | |
7233 | CodeGenFunction::JumpDest |
7234 | CodeGenFunction::getOMPCancelDestination(OpenMPDirectiveKind Kind) { |
7235 | if (Kind == OMPD_parallel || Kind == OMPD_task || |
7236 | Kind == OMPD_target_parallel || Kind == OMPD_taskloop || |
7237 | Kind == OMPD_master_taskloop || Kind == OMPD_parallel_master_taskloop) |
7238 | return ReturnBlock; |
7239 | assert(Kind == OMPD_for || Kind == OMPD_section || Kind == OMPD_sections || |
7240 | Kind == OMPD_parallel_sections || Kind == OMPD_parallel_for || |
7241 | Kind == OMPD_distribute_parallel_for || |
7242 | Kind == OMPD_target_parallel_for || |
7243 | Kind == OMPD_teams_distribute_parallel_for || |
7244 | Kind == OMPD_target_teams_distribute_parallel_for); |
7245 | return OMPCancelStack.getExitBlock(); |
7246 | } |
7247 | |
7248 | void CodeGenFunction::EmitOMPUseDevicePtrClause( |
7249 | const OMPUseDevicePtrClause &C, OMPPrivateScope &PrivateScope, |
7250 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7251 | CaptureDeviceAddrMap) { |
7252 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7253 | for (const Expr *OrigVarIt : C.varlists()) { |
7254 | const auto *OrigVD = cast<VarDecl>(cast<DeclRefExpr>(OrigVarIt)->getDecl()); |
7255 | if (!Processed.insert(OrigVD).second) |
7256 | continue; |
7257 | |
7258 | // In order to identify the right initializer we need to match the |
7259 | // declaration used by the mapping logic. In some cases we may get |
7260 | // OMPCapturedExprDecl that refers to the original declaration. |
7261 | const ValueDecl *MatchingVD = OrigVD; |
7262 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7263 | // OMPCapturedExprDecl are used to privative fields of the current |
7264 | // structure. |
7265 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7266 | assert(isa<CXXThisExpr>(ME->getBase()->IgnoreImpCasts()) && |
7267 | "Base should be the current struct!" ); |
7268 | MatchingVD = ME->getMemberDecl(); |
7269 | } |
7270 | |
7271 | // If we don't have information about the current list item, move on to |
7272 | // the next one. |
7273 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7274 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7275 | continue; |
7276 | |
7277 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7278 | |
7279 | // Return the address of the private variable. |
7280 | bool IsRegistered = PrivateScope.addPrivate( |
7281 | OrigVD, |
7282 | Address(InitAddrIt->second, Ty, |
7283 | getContext().getTypeAlignInChars(getContext().VoidPtrTy))); |
7284 | assert(IsRegistered && "firstprivate var already registered as private" ); |
7285 | // Silence the warning about unused variable. |
7286 | (void)IsRegistered; |
7287 | } |
7288 | } |
7289 | |
7290 | static const VarDecl *getBaseDecl(const Expr *Ref) { |
7291 | const Expr *Base = Ref->IgnoreParenImpCasts(); |
7292 | while (const auto *OASE = dyn_cast<OMPArraySectionExpr>(Val: Base)) |
7293 | Base = OASE->getBase()->IgnoreParenImpCasts(); |
7294 | while (const auto *ASE = dyn_cast<ArraySubscriptExpr>(Val: Base)) |
7295 | Base = ASE->getBase()->IgnoreParenImpCasts(); |
7296 | return cast<VarDecl>(Val: cast<DeclRefExpr>(Val: Base)->getDecl()); |
7297 | } |
7298 | |
7299 | void CodeGenFunction::EmitOMPUseDeviceAddrClause( |
7300 | const OMPUseDeviceAddrClause &C, OMPPrivateScope &PrivateScope, |
7301 | const llvm::DenseMap<const ValueDecl *, llvm::Value *> |
7302 | CaptureDeviceAddrMap) { |
7303 | llvm::SmallDenseSet<CanonicalDeclPtr<const Decl>, 4> Processed; |
7304 | for (const Expr *Ref : C.varlists()) { |
7305 | const VarDecl *OrigVD = getBaseDecl(Ref); |
7306 | if (!Processed.insert(OrigVD).second) |
7307 | continue; |
7308 | // In order to identify the right initializer we need to match the |
7309 | // declaration used by the mapping logic. In some cases we may get |
7310 | // OMPCapturedExprDecl that refers to the original declaration. |
7311 | const ValueDecl *MatchingVD = OrigVD; |
7312 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(MatchingVD)) { |
7313 | // OMPCapturedExprDecl are used to privative fields of the current |
7314 | // structure. |
7315 | const auto *ME = cast<MemberExpr>(OED->getInit()); |
7316 | assert(isa<CXXThisExpr>(ME->getBase()) && |
7317 | "Base should be the current struct!" ); |
7318 | MatchingVD = ME->getMemberDecl(); |
7319 | } |
7320 | |
7321 | // If we don't have information about the current list item, move on to |
7322 | // the next one. |
7323 | auto InitAddrIt = CaptureDeviceAddrMap.find(MatchingVD); |
7324 | if (InitAddrIt == CaptureDeviceAddrMap.end()) |
7325 | continue; |
7326 | |
7327 | llvm::Type *Ty = ConvertTypeForMem(OrigVD->getType().getNonReferenceType()); |
7328 | |
7329 | Address PrivAddr = |
7330 | Address(InitAddrIt->second, Ty, |
7331 | getContext().getTypeAlignInChars(getContext().VoidPtrTy)); |
7332 | // For declrefs and variable length array need to load the pointer for |
7333 | // correct mapping, since the pointer to the data was passed to the runtime. |
7334 | if (isa<DeclRefExpr>(Ref->IgnoreParenImpCasts()) || |
7335 | MatchingVD->getType()->isArrayType()) { |
7336 | QualType PtrTy = getContext().getPointerType( |
7337 | OrigVD->getType().getNonReferenceType()); |
7338 | PrivAddr = |
7339 | EmitLoadOfPointer(PrivAddr.withElementType(ConvertTypeForMem(PtrTy)), |
7340 | PtrTy->castAs<PointerType>()); |
7341 | } |
7342 | |
7343 | (void)PrivateScope.addPrivate(OrigVD, PrivAddr); |
7344 | } |
7345 | } |
7346 | |
7347 | // Generate the instructions for '#pragma omp target data' directive. |
7348 | void CodeGenFunction::EmitOMPTargetDataDirective( |
7349 | const OMPTargetDataDirective &S) { |
7350 | CGOpenMPRuntime::TargetDataInfo Info(/*RequiresDevicePointerInfo=*/true, |
7351 | /*SeparateBeginEndCalls=*/true); |
7352 | |
7353 | // Create a pre/post action to signal the privatization of the device pointer. |
7354 | // This action can be replaced by the OpenMP runtime code generation to |
7355 | // deactivate privatization. |
7356 | bool PrivatizeDevicePointers = false; |
7357 | class DevicePointerPrivActionTy : public PrePostActionTy { |
7358 | bool &PrivatizeDevicePointers; |
7359 | |
7360 | public: |
7361 | explicit DevicePointerPrivActionTy(bool &PrivatizeDevicePointers) |
7362 | : PrivatizeDevicePointers(PrivatizeDevicePointers) {} |
7363 | void Enter(CodeGenFunction &CGF) override { |
7364 | PrivatizeDevicePointers = true; |
7365 | } |
7366 | }; |
7367 | DevicePointerPrivActionTy PrivAction(PrivatizeDevicePointers); |
7368 | |
7369 | auto &&CodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7370 | auto &&InnermostCodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7371 | CGF.EmitStmt(S: S.getInnermostCapturedStmt()->getCapturedStmt()); |
7372 | }; |
7373 | |
7374 | // Codegen that selects whether to generate the privatization code or not. |
7375 | auto &&PrivCodeGen = [&](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7376 | RegionCodeGenTy RCG(InnermostCodeGen); |
7377 | PrivatizeDevicePointers = false; |
7378 | |
7379 | // Call the pre-action to change the status of PrivatizeDevicePointers if |
7380 | // needed. |
7381 | Action.Enter(CGF); |
7382 | |
7383 | if (PrivatizeDevicePointers) { |
7384 | OMPPrivateScope PrivateScope(CGF); |
7385 | // Emit all instances of the use_device_ptr clause. |
7386 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7387 | CGF.EmitOMPUseDevicePtrClause(*C, PrivateScope, |
7388 | Info.CaptureDeviceAddrMap); |
7389 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7390 | CGF.EmitOMPUseDeviceAddrClause(*C, PrivateScope, |
7391 | Info.CaptureDeviceAddrMap); |
7392 | (void)PrivateScope.Privatize(); |
7393 | RCG(CGF); |
7394 | } else { |
7395 | // If we don't have target devices, don't bother emitting the data |
7396 | // mapping code. |
7397 | std::optional<OpenMPDirectiveKind> CaptureRegion; |
7398 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7399 | // Emit helper decls of the use_device_ptr/use_device_addr clauses. |
7400 | for (const auto *C : S.getClausesOfKind<OMPUseDevicePtrClause>()) |
7401 | for (const Expr *E : C->varlists()) { |
7402 | const Decl *D = cast<DeclRefExpr>(E)->getDecl(); |
7403 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7404 | CGF.EmitVarDecl(*OED); |
7405 | } |
7406 | for (const auto *C : S.getClausesOfKind<OMPUseDeviceAddrClause>()) |
7407 | for (const Expr *E : C->varlists()) { |
7408 | const Decl *D = getBaseDecl(E); |
7409 | if (const auto *OED = dyn_cast<OMPCapturedExprDecl>(D)) |
7410 | CGF.EmitVarDecl(*OED); |
7411 | } |
7412 | } else { |
7413 | CaptureRegion = OMPD_unknown; |
7414 | } |
7415 | |
7416 | OMPLexicalScope Scope(CGF, S, CaptureRegion); |
7417 | RCG(CGF); |
7418 | } |
7419 | }; |
7420 | |
7421 | // Forward the provided action to the privatization codegen. |
7422 | RegionCodeGenTy PrivRCG(PrivCodeGen); |
7423 | PrivRCG.setAction(Action); |
7424 | |
7425 | // Notwithstanding the body of the region is emitted as inlined directive, |
7426 | // we don't use an inline scope as changes in the references inside the |
7427 | // region are expected to be visible outside, so we do not privative them. |
7428 | OMPLexicalScope Scope(CGF, S); |
7429 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_target_data, |
7430 | PrivRCG); |
7431 | }; |
7432 | |
7433 | RegionCodeGenTy RCG(CodeGen); |
7434 | |
7435 | // If we don't have target devices, don't bother emitting the data mapping |
7436 | // code. |
7437 | if (CGM.getLangOpts().OMPTargetTriples.empty()) { |
7438 | RCG(*this); |
7439 | return; |
7440 | } |
7441 | |
7442 | // Check if we have any if clause associated with the directive. |
7443 | const Expr *IfCond = nullptr; |
7444 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7445 | IfCond = C->getCondition(); |
7446 | |
7447 | // Check if we have any device clause associated with the directive. |
7448 | const Expr *Device = nullptr; |
7449 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7450 | Device = C->getDevice(); |
7451 | |
7452 | // Set the action to signal privatization of device pointers. |
7453 | RCG.setAction(PrivAction); |
7454 | |
7455 | // Emit region code. |
7456 | CGM.getOpenMPRuntime().emitTargetDataCalls(*this, S, IfCond, Device, RCG, |
7457 | Info); |
7458 | } |
7459 | |
7460 | void CodeGenFunction::EmitOMPTargetEnterDataDirective( |
7461 | const OMPTargetEnterDataDirective &S) { |
7462 | // If we don't have target devices, don't bother emitting the data mapping |
7463 | // code. |
7464 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7465 | return; |
7466 | |
7467 | // Check if we have any if clause associated with the directive. |
7468 | const Expr *IfCond = nullptr; |
7469 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7470 | IfCond = C->getCondition(); |
7471 | |
7472 | // Check if we have any device clause associated with the directive. |
7473 | const Expr *Device = nullptr; |
7474 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7475 | Device = C->getDevice(); |
7476 | |
7477 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7478 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7479 | } |
7480 | |
7481 | void CodeGenFunction::EmitOMPTargetExitDataDirective( |
7482 | const OMPTargetExitDataDirective &S) { |
7483 | // If we don't have target devices, don't bother emitting the data mapping |
7484 | // code. |
7485 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7486 | return; |
7487 | |
7488 | // Check if we have any if clause associated with the directive. |
7489 | const Expr *IfCond = nullptr; |
7490 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7491 | IfCond = C->getCondition(); |
7492 | |
7493 | // Check if we have any device clause associated with the directive. |
7494 | const Expr *Device = nullptr; |
7495 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7496 | Device = C->getDevice(); |
7497 | |
7498 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7499 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7500 | } |
7501 | |
7502 | static void emitTargetParallelRegion(CodeGenFunction &CGF, |
7503 | const OMPTargetParallelDirective &S, |
7504 | PrePostActionTy &Action) { |
7505 | // Get the captured statement associated with the 'parallel' region. |
7506 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel); |
7507 | Action.Enter(CGF); |
7508 | auto &&CodeGen = [&S, CS](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7509 | Action.Enter(CGF); |
7510 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7511 | (void)CGF.EmitOMPFirstprivateClause(S, PrivateScope); |
7512 | CGF.EmitOMPPrivateClause(S, PrivateScope); |
7513 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7514 | (void)PrivateScope.Privatize(); |
7515 | if (isOpenMPTargetExecutionDirective(S.getDirectiveKind())) |
7516 | CGF.CGM.getOpenMPRuntime().adjustTargetSpecificDataForLambdas(CGF, S); |
7517 | // TODO: Add support for clauses. |
7518 | CGF.EmitStmt(S: CS->getCapturedStmt()); |
7519 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel); |
7520 | }; |
7521 | emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen, |
7522 | emitEmptyBoundParameters); |
7523 | emitPostUpdateForReductionClause(CGF, S, |
7524 | [](CodeGenFunction &) { return nullptr; }); |
7525 | } |
7526 | |
7527 | void CodeGenFunction::EmitOMPTargetParallelDeviceFunction( |
7528 | CodeGenModule &CGM, StringRef ParentName, |
7529 | const OMPTargetParallelDirective &S) { |
7530 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7531 | emitTargetParallelRegion(CGF, S, Action); |
7532 | }; |
7533 | llvm::Function *Fn; |
7534 | llvm::Constant *Addr; |
7535 | // Emit target region as a standalone region. |
7536 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7537 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7538 | assert(Fn && Addr && "Target device function emission failed." ); |
7539 | } |
7540 | |
7541 | void CodeGenFunction::EmitOMPTargetParallelDirective( |
7542 | const OMPTargetParallelDirective &S) { |
7543 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7544 | emitTargetParallelRegion(CGF, S, Action); |
7545 | }; |
7546 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7547 | } |
7548 | |
7549 | static void emitTargetParallelForRegion(CodeGenFunction &CGF, |
7550 | const OMPTargetParallelForDirective &S, |
7551 | PrePostActionTy &Action) { |
7552 | Action.Enter(CGF); |
7553 | // Emit directive as a combined directive that consists of two implicit |
7554 | // directives: 'parallel' with 'for' directive. |
7555 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7556 | Action.Enter(CGF); |
7557 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
7558 | CGF, OMPD_target_parallel_for, S.hasCancel()); |
7559 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7560 | CGDispatchBounds: emitDispatchForLoopBounds); |
7561 | }; |
7562 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
7563 | emitEmptyBoundParameters); |
7564 | } |
7565 | |
7566 | void CodeGenFunction::EmitOMPTargetParallelForDeviceFunction( |
7567 | CodeGenModule &CGM, StringRef ParentName, |
7568 | const OMPTargetParallelForDirective &S) { |
7569 | // Emit SPMD target parallel for region as a standalone region. |
7570 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7571 | emitTargetParallelForRegion(CGF, S, Action); |
7572 | }; |
7573 | llvm::Function *Fn; |
7574 | llvm::Constant *Addr; |
7575 | // Emit target region as a standalone region. |
7576 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7577 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7578 | assert(Fn && Addr && "Target device function emission failed." ); |
7579 | } |
7580 | |
7581 | void CodeGenFunction::EmitOMPTargetParallelForDirective( |
7582 | const OMPTargetParallelForDirective &S) { |
7583 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7584 | emitTargetParallelForRegion(CGF, S, Action); |
7585 | }; |
7586 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7587 | } |
7588 | |
7589 | static void |
7590 | emitTargetParallelForSimdRegion(CodeGenFunction &CGF, |
7591 | const OMPTargetParallelForSimdDirective &S, |
7592 | PrePostActionTy &Action) { |
7593 | Action.Enter(CGF); |
7594 | // Emit directive as a combined directive that consists of two implicit |
7595 | // directives: 'parallel' with 'for' directive. |
7596 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7597 | Action.Enter(CGF); |
7598 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
7599 | CGDispatchBounds: emitDispatchForLoopBounds); |
7600 | }; |
7601 | emitCommonOMPParallelDirective(CGF, S, OMPD_simd, CodeGen, |
7602 | emitEmptyBoundParameters); |
7603 | } |
7604 | |
7605 | void CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction( |
7606 | CodeGenModule &CGM, StringRef ParentName, |
7607 | const OMPTargetParallelForSimdDirective &S) { |
7608 | // Emit SPMD target parallel for region as a standalone region. |
7609 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7610 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7611 | }; |
7612 | llvm::Function *Fn; |
7613 | llvm::Constant *Addr; |
7614 | // Emit target region as a standalone region. |
7615 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
7616 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
7617 | assert(Fn && Addr && "Target device function emission failed." ); |
7618 | } |
7619 | |
7620 | void CodeGenFunction::EmitOMPTargetParallelForSimdDirective( |
7621 | const OMPTargetParallelForSimdDirective &S) { |
7622 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7623 | emitTargetParallelForSimdRegion(CGF, S, Action); |
7624 | }; |
7625 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
7626 | } |
7627 | |
7628 | /// Emit a helper variable and return corresponding lvalue. |
7629 | static void mapParam(CodeGenFunction &CGF, const DeclRefExpr *Helper, |
7630 | const ImplicitParamDecl *PVD, |
7631 | CodeGenFunction::OMPPrivateScope &Privates) { |
7632 | const auto *VDecl = cast<VarDecl>(Val: Helper->getDecl()); |
7633 | Privates.addPrivate(LocalVD: VDecl, Addr: CGF.GetAddrOfLocalVar(PVD)); |
7634 | } |
7635 | |
7636 | void CodeGenFunction::EmitOMPTaskLoopBasedDirective(const OMPLoopDirective &S) { |
7637 | assert(isOpenMPTaskLoopDirective(S.getDirectiveKind())); |
7638 | // Emit outlined function for task construct. |
7639 | const CapturedStmt *CS = S.getCapturedStmt(OMPD_taskloop); |
7640 | Address CapturedStruct = Address::invalid(); |
7641 | { |
7642 | OMPLexicalScope Scope(*this, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7643 | CapturedStruct = GenerateCapturedStmtArgument(S: *CS); |
7644 | } |
7645 | QualType SharedsTy = getContext().getRecordType(Decl: CS->getCapturedRecordDecl()); |
7646 | const Expr *IfCond = nullptr; |
7647 | for (const auto *C : S.getClausesOfKind<OMPIfClause>()) { |
7648 | if (C->getNameModifier() == OMPD_unknown || |
7649 | C->getNameModifier() == OMPD_taskloop) { |
7650 | IfCond = C->getCondition(); |
7651 | break; |
7652 | } |
7653 | } |
7654 | |
7655 | OMPTaskDataTy Data; |
7656 | // Check if taskloop must be emitted without taskgroup. |
7657 | Data.Nogroup = S.getSingleClause<OMPNogroupClause>(); |
7658 | // TODO: Check if we should emit tied or untied task. |
7659 | Data.Tied = true; |
7660 | // Set scheduling for taskloop |
7661 | if (const auto *Clause = S.getSingleClause<OMPGrainsizeClause>()) { |
7662 | // grainsize clause |
7663 | Data.Schedule.setInt(/*IntVal=*/false); |
7664 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getGrainsize())); |
7665 | } else if (const auto *Clause = S.getSingleClause<OMPNumTasksClause>()) { |
7666 | // num_tasks clause |
7667 | Data.Schedule.setInt(/*IntVal=*/true); |
7668 | Data.Schedule.setPointer(EmitScalarExpr(E: Clause->getNumTasks())); |
7669 | } |
7670 | |
7671 | auto &&BodyGen = [CS, &S](CodeGenFunction &CGF, PrePostActionTy &) { |
7672 | // if (PreCond) { |
7673 | // for (IV in 0..LastIteration) BODY; |
7674 | // <Final counter/linear vars updates>; |
7675 | // } |
7676 | // |
7677 | |
7678 | // Emit: if (PreCond) - begin. |
7679 | // If the condition constant folds and can be elided, avoid emitting the |
7680 | // whole loop. |
7681 | bool CondConstant; |
7682 | llvm::BasicBlock *ContBlock = nullptr; |
7683 | OMPLoopScope PreInitScope(CGF, S); |
7684 | if (CGF.ConstantFoldsToSimpleInteger(Cond: S.getPreCond(), Result&: CondConstant)) { |
7685 | if (!CondConstant) |
7686 | return; |
7687 | } else { |
7688 | llvm::BasicBlock *ThenBlock = CGF.createBasicBlock(name: "taskloop.if.then" ); |
7689 | ContBlock = CGF.createBasicBlock(name: "taskloop.if.end" ); |
7690 | emitPreCond(CGF, S, Cond: S.getPreCond(), TrueBlock: ThenBlock, FalseBlock: ContBlock, |
7691 | TrueCount: CGF.getProfileCount(&S)); |
7692 | CGF.EmitBlock(BB: ThenBlock); |
7693 | CGF.incrementProfileCounter(&S); |
7694 | } |
7695 | |
7696 | (void)CGF.EmitOMPLinearClauseInit(D: S); |
7697 | |
7698 | OMPPrivateScope LoopScope(CGF); |
7699 | // Emit helper vars inits. |
7700 | enum { LowerBound = 5, UpperBound, Stride, LastIter }; |
7701 | auto *I = CS->getCapturedDecl()->param_begin(); |
7702 | auto *LBP = std::next(x: I, n: LowerBound); |
7703 | auto *UBP = std::next(x: I, n: UpperBound); |
7704 | auto *STP = std::next(x: I, n: Stride); |
7705 | auto *LIP = std::next(x: I, n: LastIter); |
7706 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getLowerBoundVariable()), PVD: *LBP, |
7707 | Privates&: LoopScope); |
7708 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getUpperBoundVariable()), PVD: *UBP, |
7709 | Privates&: LoopScope); |
7710 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getStrideVariable()), PVD: *STP, Privates&: LoopScope); |
7711 | mapParam(CGF, Helper: cast<DeclRefExpr>(Val: S.getIsLastIterVariable()), PVD: *LIP, |
7712 | Privates&: LoopScope); |
7713 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7714 | CGF.EmitOMPLinearClause(D: S, PrivateScope&: LoopScope); |
7715 | bool HasLastprivateClause = CGF.EmitOMPLastprivateClauseInit(S, LoopScope); |
7716 | (void)LoopScope.Privatize(); |
7717 | // Emit the loop iteration variable. |
7718 | const Expr *IVExpr = S.getIterationVariable(); |
7719 | const auto *IVDecl = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: IVExpr)->getDecl()); |
7720 | CGF.EmitVarDecl(D: *IVDecl); |
7721 | CGF.EmitIgnoredExpr(E: S.getInit()); |
7722 | |
7723 | // Emit the iterations count variable. |
7724 | // If it is not a variable, Sema decided to calculate iterations count on |
7725 | // each iteration (e.g., it is foldable into a constant). |
7726 | if (const auto *LIExpr = dyn_cast<DeclRefExpr>(Val: S.getLastIteration())) { |
7727 | CGF.EmitVarDecl(D: *cast<VarDecl>(Val: LIExpr->getDecl())); |
7728 | // Emit calculation of the iterations count. |
7729 | CGF.EmitIgnoredExpr(E: S.getCalcLastIteration()); |
7730 | } |
7731 | |
7732 | { |
7733 | OMPLexicalScope Scope(CGF, S, OMPD_taskloop, /*EmitPreInitStmt=*/false); |
7734 | emitCommonSimdLoop( |
7735 | CGF, S, |
7736 | SimdInitGen: [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7737 | if (isOpenMPSimdDirective(S.getDirectiveKind())) |
7738 | CGF.EmitOMPSimdInit(D: S); |
7739 | }, |
7740 | BodyCodeGen: [&S, &LoopScope](CodeGenFunction &CGF, PrePostActionTy &) { |
7741 | CGF.EmitOMPInnerLoop( |
7742 | S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(), |
7743 | [&S](CodeGenFunction &CGF) { |
7744 | emitOMPLoopBodyWithStopPoint(CGF, S, |
7745 | LoopExit: CodeGenFunction::JumpDest()); |
7746 | }, |
7747 | [](CodeGenFunction &) {}); |
7748 | }); |
7749 | } |
7750 | // Emit: if (PreCond) - end. |
7751 | if (ContBlock) { |
7752 | CGF.EmitBranch(Block: ContBlock); |
7753 | CGF.EmitBlock(BB: ContBlock, IsFinished: true); |
7754 | } |
7755 | // Emit final copy of the lastprivate variables if IsLastIter != 0. |
7756 | if (HasLastprivateClause) { |
7757 | CGF.EmitOMPLastprivateClauseFinal( |
7758 | S, isOpenMPSimdDirective(S.getDirectiveKind()), |
7759 | CGF.Builder.CreateIsNotNull(CGF.EmitLoadOfScalar( |
7760 | CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7761 | (*LIP)->getType(), S.getBeginLoc()))); |
7762 | } |
7763 | LoopScope.restoreMap(); |
7764 | CGF.EmitOMPLinearClauseFinal(S, [LIP, &S](CodeGenFunction &CGF) { |
7765 | return CGF.Builder.CreateIsNotNull( |
7766 | CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(*LIP), /*Volatile=*/false, |
7767 | (*LIP)->getType(), S.getBeginLoc())); |
7768 | }); |
7769 | }; |
7770 | auto &&TaskGen = [&S, SharedsTy, CapturedStruct, |
7771 | IfCond](CodeGenFunction &CGF, llvm::Function *OutlinedFn, |
7772 | const OMPTaskDataTy &Data) { |
7773 | auto &&CodeGen = [&S, OutlinedFn, SharedsTy, CapturedStruct, IfCond, |
7774 | &Data](CodeGenFunction &CGF, PrePostActionTy &) { |
7775 | OMPLoopScope PreInitScope(CGF, S); |
7776 | CGF.CGM.getOpenMPRuntime().emitTaskLoopCall(CGF, Loc: S.getBeginLoc(), D: S, |
7777 | TaskFunction: OutlinedFn, SharedsTy, |
7778 | Shareds: CapturedStruct, IfCond, Data); |
7779 | }; |
7780 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_taskloop, |
7781 | CodeGen); |
7782 | }; |
7783 | if (Data.Nogroup) { |
7784 | EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, Data); |
7785 | } else { |
7786 | CGM.getOpenMPRuntime().emitTaskgroupRegion( |
7787 | CGF&: *this, |
7788 | TaskgroupOpGen: [&S, &BodyGen, &TaskGen, &Data](CodeGenFunction &CGF, |
7789 | PrePostActionTy &Action) { |
7790 | Action.Enter(CGF); |
7791 | CGF.EmitOMPTaskBasedDirective(S, OMPD_taskloop, BodyGen, TaskGen, |
7792 | Data); |
7793 | }, |
7794 | Loc: S.getBeginLoc()); |
7795 | } |
7796 | } |
7797 | |
7798 | void CodeGenFunction::EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S) { |
7799 | auto LPCRegion = |
7800 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7801 | EmitOMPTaskLoopBasedDirective(S); |
7802 | } |
7803 | |
7804 | void CodeGenFunction::EmitOMPTaskLoopSimdDirective( |
7805 | const OMPTaskLoopSimdDirective &S) { |
7806 | auto LPCRegion = |
7807 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7808 | OMPLexicalScope Scope(*this, S); |
7809 | EmitOMPTaskLoopBasedDirective(S); |
7810 | } |
7811 | |
7812 | void CodeGenFunction::EmitOMPMasterTaskLoopDirective( |
7813 | const OMPMasterTaskLoopDirective &S) { |
7814 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7815 | Action.Enter(CGF); |
7816 | EmitOMPTaskLoopBasedDirective(S); |
7817 | }; |
7818 | auto LPCRegion = |
7819 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7820 | OMPLexicalScope Scope(*this, S, std::nullopt, /*EmitPreInitStmt=*/false); |
7821 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
7822 | } |
7823 | |
7824 | void CodeGenFunction::EmitOMPMasterTaskLoopSimdDirective( |
7825 | const OMPMasterTaskLoopSimdDirective &S) { |
7826 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7827 | Action.Enter(CGF); |
7828 | EmitOMPTaskLoopBasedDirective(S); |
7829 | }; |
7830 | auto LPCRegion = |
7831 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7832 | OMPLexicalScope Scope(*this, S); |
7833 | CGM.getOpenMPRuntime().emitMasterRegion(CGF&: *this, MasterOpGen: CodeGen, Loc: S.getBeginLoc()); |
7834 | } |
7835 | |
7836 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopDirective( |
7837 | const OMPParallelMasterTaskLoopDirective &S) { |
7838 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7839 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7840 | PrePostActionTy &Action) { |
7841 | Action.Enter(CGF); |
7842 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7843 | }; |
7844 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7845 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
7846 | Loc: S.getBeginLoc()); |
7847 | }; |
7848 | auto LPCRegion = |
7849 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7850 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop, CodeGen, |
7851 | emitEmptyBoundParameters); |
7852 | } |
7853 | |
7854 | void CodeGenFunction::EmitOMPParallelMasterTaskLoopSimdDirective( |
7855 | const OMPParallelMasterTaskLoopSimdDirective &S) { |
7856 | auto &&CodeGen = [this, &S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7857 | auto &&TaskLoopCodeGen = [&S](CodeGenFunction &CGF, |
7858 | PrePostActionTy &Action) { |
7859 | Action.Enter(CGF); |
7860 | CGF.EmitOMPTaskLoopBasedDirective(S); |
7861 | }; |
7862 | OMPLexicalScope Scope(CGF, S, OMPD_parallel, /*EmitPreInitStmt=*/false); |
7863 | CGM.getOpenMPRuntime().emitMasterRegion(CGF, MasterOpGen: TaskLoopCodeGen, |
7864 | Loc: S.getBeginLoc()); |
7865 | }; |
7866 | auto LPCRegion = |
7867 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7868 | emitCommonOMPParallelDirective(*this, S, OMPD_master_taskloop_simd, CodeGen, |
7869 | emitEmptyBoundParameters); |
7870 | } |
7871 | |
7872 | // Generate the instructions for '#pragma omp target update' directive. |
7873 | void CodeGenFunction::EmitOMPTargetUpdateDirective( |
7874 | const OMPTargetUpdateDirective &S) { |
7875 | // If we don't have target devices, don't bother emitting the data mapping |
7876 | // code. |
7877 | if (CGM.getLangOpts().OMPTargetTriples.empty()) |
7878 | return; |
7879 | |
7880 | // Check if we have any if clause associated with the directive. |
7881 | const Expr *IfCond = nullptr; |
7882 | if (const auto *C = S.getSingleClause<OMPIfClause>()) |
7883 | IfCond = C->getCondition(); |
7884 | |
7885 | // Check if we have any device clause associated with the directive. |
7886 | const Expr *Device = nullptr; |
7887 | if (const auto *C = S.getSingleClause<OMPDeviceClause>()) |
7888 | Device = C->getDevice(); |
7889 | |
7890 | OMPLexicalScope Scope(*this, S, OMPD_task); |
7891 | CGM.getOpenMPRuntime().emitTargetDataStandAloneCall(*this, S, IfCond, Device); |
7892 | } |
7893 | |
7894 | void CodeGenFunction::EmitOMPGenericLoopDirective( |
7895 | const OMPGenericLoopDirective &S) { |
7896 | // Unimplemented, just inline the underlying statement for now. |
7897 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7898 | // Emit the loop iteration variable. |
7899 | const Stmt *CS = |
7900 | cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt(); |
7901 | const auto *ForS = dyn_cast<ForStmt>(Val: CS); |
7902 | if (ForS && !isa<DeclStmt>(ForS->getInit())) { |
7903 | OMPPrivateScope LoopScope(CGF); |
7904 | CGF.EmitOMPPrivateLoopCounters(S, LoopScope); |
7905 | (void)LoopScope.Privatize(); |
7906 | CGF.EmitStmt(S: CS); |
7907 | LoopScope.restoreMap(); |
7908 | } else { |
7909 | CGF.EmitStmt(S: CS); |
7910 | } |
7911 | }; |
7912 | OMPLexicalScope Scope(*this, S, OMPD_unknown); |
7913 | CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_loop, CodeGen); |
7914 | } |
7915 | |
7916 | void CodeGenFunction::EmitOMPParallelGenericLoopDirective( |
7917 | const OMPLoopDirective &S) { |
7918 | // Emit combined directive as if its consituent constructs are 'parallel' |
7919 | // and 'for'. |
7920 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
7921 | Action.Enter(CGF); |
7922 | emitOMPCopyinClause(CGF, S); |
7923 | (void)emitWorksharingDirective(CGF, S, /*HasCancel=*/false); |
7924 | }; |
7925 | { |
7926 | auto LPCRegion = |
7927 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(*this, S); |
7928 | emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen, |
7929 | emitEmptyBoundParameters); |
7930 | } |
7931 | // Check for outer lastprivate conditional update. |
7932 | checkForLastprivateConditionalUpdate(*this, S); |
7933 | } |
7934 | |
7935 | void CodeGenFunction::EmitOMPTeamsGenericLoopDirective( |
7936 | const OMPTeamsGenericLoopDirective &S) { |
7937 | // To be consistent with current behavior of 'target teams loop', emit |
7938 | // 'teams loop' as if its constituent constructs are 'teams' and 'distribute'. |
7939 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7940 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
7941 | }; |
7942 | |
7943 | // Emit teams region as a standalone region. |
7944 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7945 | PrePostActionTy &Action) { |
7946 | Action.Enter(CGF); |
7947 | OMPPrivateScope PrivateScope(CGF); |
7948 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7949 | (void)PrivateScope.Privatize(); |
7950 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective(CGF, OMPD_distribute, |
7951 | CodeGenDistribute); |
7952 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7953 | }; |
7954 | emitCommonOMPTeamsDirective(*this, S, OMPD_distribute, CodeGen); |
7955 | emitPostUpdateForReductionClause(*this, S, |
7956 | [](CodeGenFunction &) { return nullptr; }); |
7957 | } |
7958 | |
7959 | #ifndef NDEBUG |
7960 | static void emitTargetTeamsLoopCodegenStatus(CodeGenFunction &CGF, |
7961 | std::string StatusMsg, |
7962 | const OMPExecutableDirective &D) { |
7963 | bool IsDevice = CGF.CGM.getLangOpts().OpenMPIsTargetDevice; |
7964 | if (IsDevice) |
7965 | StatusMsg += ": DEVICE" ; |
7966 | else |
7967 | StatusMsg += ": HOST" ; |
7968 | SourceLocation L = D.getBeginLoc(); |
7969 | auto &SM = CGF.getContext().getSourceManager(); |
7970 | PresumedLoc PLoc = SM.getPresumedLoc(Loc: L); |
7971 | const char *FileName = PLoc.isValid() ? PLoc.getFilename() : nullptr; |
7972 | unsigned LineNo = |
7973 | PLoc.isValid() ? PLoc.getLine() : SM.getExpansionLineNumber(Loc: L); |
7974 | llvm::dbgs() << StatusMsg << ": " << FileName << ": " << LineNo << "\n" ; |
7975 | } |
7976 | #endif |
7977 | |
7978 | static void emitTargetTeamsGenericLoopRegionAsParallel( |
7979 | CodeGenFunction &CGF, PrePostActionTy &Action, |
7980 | const OMPTargetTeamsGenericLoopDirective &S) { |
7981 | Action.Enter(CGF); |
7982 | // Emit 'teams loop' as if its constituent constructs are 'distribute, |
7983 | // 'parallel, and 'for'. |
7984 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
7985 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitInnerParallelForWhenCombined, |
7986 | IncExpr: S.getDistInc()); |
7987 | }; |
7988 | |
7989 | // Emit teams region as a standalone region. |
7990 | auto &&CodeGenTeams = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
7991 | PrePostActionTy &Action) { |
7992 | Action.Enter(CGF); |
7993 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
7994 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
7995 | (void)PrivateScope.Privatize(); |
7996 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
7997 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
7998 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
7999 | }; |
8000 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8001 | emitTargetTeamsLoopCodegenStatus( |
8002 | CGF, TTL_CODEGEN_TYPE " as parallel for" , S)); |
8003 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute_parallel_for, |
8004 | CodeGenTeams); |
8005 | emitPostUpdateForReductionClause(CGF, S, |
8006 | [](CodeGenFunction &) { return nullptr; }); |
8007 | } |
8008 | |
8009 | static void emitTargetTeamsGenericLoopRegionAsDistribute( |
8010 | CodeGenFunction &CGF, PrePostActionTy &Action, |
8011 | const OMPTargetTeamsGenericLoopDirective &S) { |
8012 | Action.Enter(CGF); |
8013 | // Emit 'teams loop' as if its constituent construct is 'distribute'. |
8014 | auto &&CodeGenDistribute = [&S](CodeGenFunction &CGF, PrePostActionTy &) { |
8015 | CGF.EmitOMPDistributeLoop(S, CodeGenLoop: emitOMPLoopBodyWithStopPoint, IncExpr: S.getInc()); |
8016 | }; |
8017 | |
8018 | // Emit teams region as a standalone region. |
8019 | auto &&CodeGen = [&S, &CodeGenDistribute](CodeGenFunction &CGF, |
8020 | PrePostActionTy &Action) { |
8021 | Action.Enter(CGF); |
8022 | CodeGenFunction::OMPPrivateScope PrivateScope(CGF); |
8023 | CGF.EmitOMPReductionClauseInit(S, PrivateScope); |
8024 | (void)PrivateScope.Privatize(); |
8025 | CGF.CGM.getOpenMPRuntime().emitInlinedDirective( |
8026 | CGF, OMPD_distribute, CodeGenDistribute, /*HasCancel=*/false); |
8027 | CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_teams); |
8028 | }; |
8029 | DEBUG_WITH_TYPE(TTL_CODEGEN_TYPE, |
8030 | emitTargetTeamsLoopCodegenStatus( |
8031 | CGF, TTL_CODEGEN_TYPE " as distribute" , S)); |
8032 | emitCommonOMPTeamsDirective(CGF, S, OMPD_distribute, CodeGen); |
8033 | emitPostUpdateForReductionClause(CGF, S, |
8034 | [](CodeGenFunction &) { return nullptr; }); |
8035 | } |
8036 | |
8037 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDirective( |
8038 | const OMPTargetTeamsGenericLoopDirective &S) { |
8039 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8040 | if (S.canBeParallelFor()) |
8041 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8042 | else |
8043 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8044 | }; |
8045 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
8046 | } |
8047 | |
8048 | void CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction( |
8049 | CodeGenModule &CGM, StringRef ParentName, |
8050 | const OMPTargetTeamsGenericLoopDirective &S) { |
8051 | // Emit SPMD target parallel loop region as a standalone region. |
8052 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8053 | if (S.canBeParallelFor()) |
8054 | emitTargetTeamsGenericLoopRegionAsParallel(CGF, Action, S); |
8055 | else |
8056 | emitTargetTeamsGenericLoopRegionAsDistribute(CGF, Action, S); |
8057 | }; |
8058 | llvm::Function *Fn; |
8059 | llvm::Constant *Addr; |
8060 | // Emit target region as a standalone region. |
8061 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8062 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
8063 | assert(Fn && Addr && |
8064 | "Target device function emission failed for 'target teams loop'." ); |
8065 | } |
8066 | |
8067 | static void emitTargetParallelGenericLoopRegion( |
8068 | CodeGenFunction &CGF, const OMPTargetParallelGenericLoopDirective &S, |
8069 | PrePostActionTy &Action) { |
8070 | Action.Enter(CGF); |
8071 | // Emit as 'parallel for'. |
8072 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8073 | Action.Enter(CGF); |
8074 | CodeGenFunction::OMPCancelStackRAII CancelRegion( |
8075 | CGF, OMPD_target_parallel_loop, /*hasCancel=*/false); |
8076 | CGF.EmitOMPWorksharingLoop(S, EUB: S.getEnsureUpperBound(), CodeGenLoopBounds: emitForLoopBounds, |
8077 | CGDispatchBounds: emitDispatchForLoopBounds); |
8078 | }; |
8079 | emitCommonOMPParallelDirective(CGF, S, OMPD_for, CodeGen, |
8080 | emitEmptyBoundParameters); |
8081 | } |
8082 | |
8083 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction( |
8084 | CodeGenModule &CGM, StringRef ParentName, |
8085 | const OMPTargetParallelGenericLoopDirective &S) { |
8086 | // Emit target parallel loop region as a standalone region. |
8087 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8088 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8089 | }; |
8090 | llvm::Function *Fn; |
8091 | llvm::Constant *Addr; |
8092 | // Emit target region as a standalone region. |
8093 | CGM.getOpenMPRuntime().emitTargetOutlinedFunction( |
8094 | S, ParentName, Fn, Addr, /*IsOffloadEntry=*/true, CodeGen); |
8095 | assert(Fn && Addr && "Target device function emission failed." ); |
8096 | } |
8097 | |
8098 | /// Emit combined directive 'target parallel loop' as if its constituent |
8099 | /// constructs are 'target', 'parallel', and 'for'. |
8100 | void CodeGenFunction::EmitOMPTargetParallelGenericLoopDirective( |
8101 | const OMPTargetParallelGenericLoopDirective &S) { |
8102 | auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8103 | emitTargetParallelGenericLoopRegion(CGF, S, Action); |
8104 | }; |
8105 | emitCommonOMPTargetDirective(*this, S, CodeGen); |
8106 | } |
8107 | |
8108 | void CodeGenFunction::EmitSimpleOMPExecutableDirective( |
8109 | const OMPExecutableDirective &D) { |
8110 | if (const auto *SD = dyn_cast<OMPScanDirective>(Val: &D)) { |
8111 | EmitOMPScanDirective(S: *SD); |
8112 | return; |
8113 | } |
8114 | if (!D.hasAssociatedStmt() || !D.getAssociatedStmt()) |
8115 | return; |
8116 | auto &&CodeGen = [&D](CodeGenFunction &CGF, PrePostActionTy &Action) { |
8117 | OMPPrivateScope GlobalsScope(CGF); |
8118 | if (isOpenMPTaskingDirective(D.getDirectiveKind())) { |
8119 | // Capture global firstprivates to avoid crash. |
8120 | for (const auto *C : D.getClausesOfKind<OMPFirstprivateClause>()) { |
8121 | for (const Expr *Ref : C->varlists()) { |
8122 | const auto *DRE = cast<DeclRefExpr>(Ref->IgnoreParenImpCasts()); |
8123 | if (!DRE) |
8124 | continue; |
8125 | const auto *VD = dyn_cast<VarDecl>(DRE->getDecl()); |
8126 | if (!VD || VD->hasLocalStorage()) |
8127 | continue; |
8128 | if (!CGF.LocalDeclMap.count(VD)) { |
8129 | LValue GlobLVal = CGF.EmitLValue(Ref); |
8130 | GlobalsScope.addPrivate(VD, GlobLVal.getAddress(CGF)); |
8131 | } |
8132 | } |
8133 | } |
8134 | } |
8135 | if (isOpenMPSimdDirective(D.getDirectiveKind())) { |
8136 | (void)GlobalsScope.Privatize(); |
8137 | ParentLoopDirectiveForScanRegion ScanRegion(CGF, D); |
8138 | emitOMPSimdRegion(CGF, S: cast<OMPLoopDirective>(Val: D), Action); |
8139 | } else { |
8140 | if (const auto *LD = dyn_cast<OMPLoopDirective>(Val: &D)) { |
8141 | for (const Expr *E : LD->counters()) { |
8142 | const auto *VD = cast<VarDecl>(Val: cast<DeclRefExpr>(Val: E)->getDecl()); |
8143 | if (!VD->hasLocalStorage() && !CGF.LocalDeclMap.count(VD)) { |
8144 | LValue GlobLVal = CGF.EmitLValue(E); |
8145 | GlobalsScope.addPrivate(LocalVD: VD, Addr: GlobLVal.getAddress(CGF)); |
8146 | } |
8147 | if (isa<OMPCapturedExprDecl>(Val: VD)) { |
8148 | // Emit only those that were not explicitly referenced in clauses. |
8149 | if (!CGF.LocalDeclMap.count(VD)) |
8150 | CGF.EmitVarDecl(D: *VD); |
8151 | } |
8152 | } |
8153 | for (const auto *C : D.getClausesOfKind<OMPOrderedClause>()) { |
8154 | if (!C->getNumForLoops()) |
8155 | continue; |
8156 | for (unsigned I = LD->getLoopsNumber(), |
8157 | E = C->getLoopNumIterations().size(); |
8158 | I < E; ++I) { |
8159 | if (const auto *VD = dyn_cast<OMPCapturedExprDecl>( |
8160 | cast<DeclRefExpr>(C->getLoopCounter(I))->getDecl())) { |
8161 | // Emit only those that were not explicitly referenced in clauses. |
8162 | if (!CGF.LocalDeclMap.count(Val: VD)) |
8163 | CGF.EmitVarDecl(D: *VD); |
8164 | } |
8165 | } |
8166 | } |
8167 | } |
8168 | (void)GlobalsScope.Privatize(); |
8169 | CGF.EmitStmt(S: D.getInnermostCapturedStmt()->getCapturedStmt()); |
8170 | } |
8171 | }; |
8172 | if (D.getDirectiveKind() == OMPD_atomic || |
8173 | D.getDirectiveKind() == OMPD_critical || |
8174 | D.getDirectiveKind() == OMPD_section || |
8175 | D.getDirectiveKind() == OMPD_master || |
8176 | D.getDirectiveKind() == OMPD_masked || |
8177 | D.getDirectiveKind() == OMPD_unroll) { |
8178 | EmitStmt(S: D.getAssociatedStmt()); |
8179 | } else { |
8180 | auto LPCRegion = |
8181 | CGOpenMPRuntime::LastprivateConditionalRAII::disable(CGF&: *this, S: D); |
8182 | OMPSimdLexicalScope Scope(*this, D); |
8183 | CGM.getOpenMPRuntime().emitInlinedDirective( |
8184 | *this, |
8185 | isOpenMPSimdDirective(D.getDirectiveKind()) ? OMPD_simd |
8186 | : D.getDirectiveKind(), |
8187 | CodeGen); |
8188 | } |
8189 | // Check for outer lastprivate conditional update. |
8190 | checkForLastprivateConditionalUpdate(CGF&: *this, S: D); |
8191 | } |
8192 | |