1 | //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// \file |
9 | /// This file implements semantic analysis for CUDA constructs. |
10 | /// |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "clang/Sema/SemaCUDA.h" |
14 | #include "clang/AST/ASTContext.h" |
15 | #include "clang/AST/Decl.h" |
16 | #include "clang/AST/ExprCXX.h" |
17 | #include "clang/Basic/Cuda.h" |
18 | #include "clang/Basic/TargetInfo.h" |
19 | #include "clang/Lex/Preprocessor.h" |
20 | #include "clang/Sema/Lookup.h" |
21 | #include "clang/Sema/ScopeInfo.h" |
22 | #include "clang/Sema/Sema.h" |
23 | #include "clang/Sema/SemaDiagnostic.h" |
24 | #include "clang/Sema/SemaInternal.h" |
25 | #include "clang/Sema/Template.h" |
26 | #include "llvm/ADT/STLForwardCompat.h" |
27 | #include "llvm/ADT/SmallVector.h" |
28 | #include <optional> |
29 | using namespace clang; |
30 | |
31 | SemaCUDA::SemaCUDA(Sema &S) : SemaBase(S) {} |
32 | |
33 | template <typename AttrT> static bool hasExplicitAttr(const VarDecl *D) { |
34 | if (!D) |
35 | return false; |
36 | if (auto *A = D->getAttr<AttrT>()) |
37 | return !A->isImplicit(); |
38 | return false; |
39 | } |
40 | |
41 | void SemaCUDA::PushForceHostDevice() { |
42 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
43 | ForceHostDeviceDepth++; |
44 | } |
45 | |
46 | bool SemaCUDA::PopForceHostDevice() { |
47 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
48 | if (ForceHostDeviceDepth == 0) |
49 | return false; |
50 | ForceHostDeviceDepth--; |
51 | return true; |
52 | } |
53 | |
54 | ExprResult SemaCUDA::ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc, |
55 | MultiExprArg ExecConfig, |
56 | SourceLocation GGGLoc) { |
57 | FunctionDecl *ConfigDecl = getASTContext().getcudaConfigureCallDecl(); |
58 | if (!ConfigDecl) |
59 | return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use) |
60 | << getConfigureFuncName()); |
61 | QualType ConfigQTy = ConfigDecl->getType(); |
62 | |
63 | DeclRefExpr *ConfigDR = new (getASTContext()) DeclRefExpr( |
64 | getASTContext(), ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc); |
65 | SemaRef.MarkFunctionReferenced(Loc: LLLLoc, Func: ConfigDecl); |
66 | |
67 | return SemaRef.BuildCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr, |
68 | /*IsExecConfig=*/true); |
69 | } |
70 | |
71 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const ParsedAttributesView &Attrs) { |
72 | bool HasHostAttr = false; |
73 | bool HasDeviceAttr = false; |
74 | bool HasGlobalAttr = false; |
75 | bool HasInvalidTargetAttr = false; |
76 | for (const ParsedAttr &AL : Attrs) { |
77 | switch (AL.getKind()) { |
78 | case ParsedAttr::AT_CUDAGlobal: |
79 | HasGlobalAttr = true; |
80 | break; |
81 | case ParsedAttr::AT_CUDAHost: |
82 | HasHostAttr = true; |
83 | break; |
84 | case ParsedAttr::AT_CUDADevice: |
85 | HasDeviceAttr = true; |
86 | break; |
87 | case ParsedAttr::AT_CUDAInvalidTarget: |
88 | HasInvalidTargetAttr = true; |
89 | break; |
90 | default: |
91 | break; |
92 | } |
93 | } |
94 | |
95 | if (HasInvalidTargetAttr) |
96 | return CUDAFunctionTarget::InvalidTarget; |
97 | |
98 | if (HasGlobalAttr) |
99 | return CUDAFunctionTarget::Global; |
100 | |
101 | if (HasHostAttr && HasDeviceAttr) |
102 | return CUDAFunctionTarget::HostDevice; |
103 | |
104 | if (HasDeviceAttr) |
105 | return CUDAFunctionTarget::Device; |
106 | |
107 | return CUDAFunctionTarget::Host; |
108 | } |
109 | |
110 | template <typename A> |
111 | static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) { |
112 | return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) { |
113 | return isa<A>(Attribute) && |
114 | !(IgnoreImplicitAttr && Attribute->isImplicit()); |
115 | }); |
116 | } |
117 | |
118 | SemaCUDA::CUDATargetContextRAII::( |
119 | SemaCUDA &S_, SemaCUDA::CUDATargetContextKind K, Decl *D) |
120 | : S(S_) { |
121 | SavedCtx = S.CurCUDATargetCtx; |
122 | assert(K == SemaCUDA::CTCK_InitGlobalVar); |
123 | auto *VD = dyn_cast_or_null<VarDecl>(Val: D); |
124 | if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) { |
125 | auto Target = CUDAFunctionTarget::Host; |
126 | if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) && |
127 | !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) || |
128 | hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) || |
129 | hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true)) |
130 | Target = CUDAFunctionTarget::Device; |
131 | S.CurCUDATargetCtx = {Target, K, VD}; |
132 | } |
133 | } |
134 | |
135 | /// IdentifyTarget - Determine the CUDA compilation target for this function |
136 | CUDAFunctionTarget SemaCUDA::IdentifyTarget(const FunctionDecl *D, |
137 | bool IgnoreImplicitHDAttr) { |
138 | // Code that lives outside a function gets the target from CurCUDATargetCtx. |
139 | if (D == nullptr) |
140 | return CurCUDATargetCtx.Target; |
141 | |
142 | if (D->hasAttr<CUDAInvalidTargetAttr>()) |
143 | return CUDAFunctionTarget::InvalidTarget; |
144 | |
145 | if (D->hasAttr<CUDAGlobalAttr>()) |
146 | return CUDAFunctionTarget::Global; |
147 | |
148 | if (hasAttr<CUDADeviceAttr>(D, IgnoreImplicitHDAttr)) { |
149 | if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) |
150 | return CUDAFunctionTarget::HostDevice; |
151 | return CUDAFunctionTarget::Device; |
152 | } else if (hasAttr<CUDAHostAttr>(D, IgnoreImplicitHDAttr)) { |
153 | return CUDAFunctionTarget::Host; |
154 | } else if ((D->isImplicit() || !D->isUserProvided()) && |
155 | !IgnoreImplicitHDAttr) { |
156 | // Some implicit declarations (like intrinsic functions) are not marked. |
157 | // Set the most lenient target on them for maximal flexibility. |
158 | return CUDAFunctionTarget::HostDevice; |
159 | } |
160 | |
161 | return CUDAFunctionTarget::Host; |
162 | } |
163 | |
164 | /// IdentifyTarget - Determine the CUDA compilation target for this variable. |
165 | SemaCUDA::CUDAVariableTarget SemaCUDA::IdentifyTarget(const VarDecl *Var) { |
166 | if (Var->hasAttr<HIPManagedAttr>()) |
167 | return CVT_Unified; |
168 | // Only constexpr and const variabless with implicit constant attribute |
169 | // are emitted on both sides. Such variables are promoted to device side |
170 | // only if they have static constant intializers on device side. |
171 | if ((Var->isConstexpr() || Var->getType().isConstQualified()) && |
172 | Var->hasAttr<CUDAConstantAttr>() && |
173 | !hasExplicitAttr<CUDAConstantAttr>(Var)) |
174 | return CVT_Both; |
175 | if (Var->hasAttr<CUDADeviceAttr>() || Var->hasAttr<CUDAConstantAttr>() || |
176 | Var->hasAttr<CUDASharedAttr>() || |
177 | Var->getType()->isCUDADeviceBuiltinSurfaceType() || |
178 | Var->getType()->isCUDADeviceBuiltinTextureType()) |
179 | return CVT_Device; |
180 | // Function-scope static variable without explicit device or constant |
181 | // attribute are emitted |
182 | // - on both sides in host device functions |
183 | // - on device side in device or global functions |
184 | if (auto *FD = dyn_cast<FunctionDecl>(Var->getDeclContext())) { |
185 | switch (IdentifyTarget(FD)) { |
186 | case CUDAFunctionTarget::HostDevice: |
187 | return CVT_Both; |
188 | case CUDAFunctionTarget::Device: |
189 | case CUDAFunctionTarget::Global: |
190 | return CVT_Device; |
191 | default: |
192 | return CVT_Host; |
193 | } |
194 | } |
195 | return CVT_Host; |
196 | } |
197 | |
198 | // * CUDA Call preference table |
199 | // |
200 | // F - from, |
201 | // T - to |
202 | // Ph - preference in host mode |
203 | // Pd - preference in device mode |
204 | // H - handled in (x) |
205 | // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never. |
206 | // |
207 | // | F | T | Ph | Pd | H | |
208 | // |----+----+-----+-----+-----+ |
209 | // | d | d | N | N | (c) | |
210 | // | d | g | -- | -- | (a) | |
211 | // | d | h | -- | -- | (e) | |
212 | // | d | hd | HD | HD | (b) | |
213 | // | g | d | N | N | (c) | |
214 | // | g | g | -- | -- | (a) | |
215 | // | g | h | -- | -- | (e) | |
216 | // | g | hd | HD | HD | (b) | |
217 | // | h | d | -- | -- | (e) | |
218 | // | h | g | N | N | (c) | |
219 | // | h | h | N | N | (c) | |
220 | // | h | hd | HD | HD | (b) | |
221 | // | hd | d | WS | SS | (d) | |
222 | // | hd | g | SS | -- |(d/a)| |
223 | // | hd | h | SS | WS | (d) | |
224 | // | hd | hd | HD | HD | (b) | |
225 | |
226 | SemaCUDA::CUDAFunctionPreference |
227 | SemaCUDA::IdentifyPreference(const FunctionDecl *Caller, |
228 | const FunctionDecl *Callee) { |
229 | assert(Callee && "Callee must be valid." ); |
230 | |
231 | // Treat ctor/dtor as host device function in device var initializer to allow |
232 | // trivial ctor/dtor without device attr to be used. Non-trivial ctor/dtor |
233 | // will be diagnosed by checkAllowedInitializer. |
234 | if (Caller == nullptr && CurCUDATargetCtx.Kind == CTCK_InitGlobalVar && |
235 | CurCUDATargetCtx.Target == CUDAFunctionTarget::Device && |
236 | (isa<CXXConstructorDecl>(Val: Callee) || isa<CXXDestructorDecl>(Val: Callee))) |
237 | return CFP_HostDevice; |
238 | |
239 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
240 | CUDAFunctionTarget CalleeTarget = IdentifyTarget(D: Callee); |
241 | |
242 | // If one of the targets is invalid, the check always fails, no matter what |
243 | // the other target is. |
244 | if (CallerTarget == CUDAFunctionTarget::InvalidTarget || |
245 | CalleeTarget == CUDAFunctionTarget::InvalidTarget) |
246 | return CFP_Never; |
247 | |
248 | // (a) Can't call global from some contexts until we support CUDA's |
249 | // dynamic parallelism. |
250 | if (CalleeTarget == CUDAFunctionTarget::Global && |
251 | (CallerTarget == CUDAFunctionTarget::Global || |
252 | CallerTarget == CUDAFunctionTarget::Device)) |
253 | return CFP_Never; |
254 | |
255 | // (b) Calling HostDevice is OK for everyone. |
256 | if (CalleeTarget == CUDAFunctionTarget::HostDevice) |
257 | return CFP_HostDevice; |
258 | |
259 | // (c) Best case scenarios |
260 | if (CalleeTarget == CallerTarget || |
261 | (CallerTarget == CUDAFunctionTarget::Host && |
262 | CalleeTarget == CUDAFunctionTarget::Global) || |
263 | (CallerTarget == CUDAFunctionTarget::Global && |
264 | CalleeTarget == CUDAFunctionTarget::Device)) |
265 | return CFP_Native; |
266 | |
267 | // HipStdPar mode is special, in that assessing whether a device side call to |
268 | // a host target is deferred to a subsequent pass, and cannot unambiguously be |
269 | // adjudicated in the AST, hence we optimistically allow them to pass here. |
270 | if (getLangOpts().HIPStdPar && |
271 | (CallerTarget == CUDAFunctionTarget::Global || |
272 | CallerTarget == CUDAFunctionTarget::Device || |
273 | CallerTarget == CUDAFunctionTarget::HostDevice) && |
274 | CalleeTarget == CUDAFunctionTarget::Host) |
275 | return CFP_HostDevice; |
276 | |
277 | // (d) HostDevice behavior depends on compilation mode. |
278 | if (CallerTarget == CUDAFunctionTarget::HostDevice) { |
279 | // It's OK to call a compilation-mode matching function from an HD one. |
280 | if ((getLangOpts().CUDAIsDevice && |
281 | CalleeTarget == CUDAFunctionTarget::Device) || |
282 | (!getLangOpts().CUDAIsDevice && |
283 | (CalleeTarget == CUDAFunctionTarget::Host || |
284 | CalleeTarget == CUDAFunctionTarget::Global))) |
285 | return CFP_SameSide; |
286 | |
287 | // Calls from HD to non-mode-matching functions (i.e., to host functions |
288 | // when compiling in device mode or to device functions when compiling in |
289 | // host mode) are allowed at the sema level, but eventually rejected if |
290 | // they're ever codegened. TODO: Reject said calls earlier. |
291 | return CFP_WrongSide; |
292 | } |
293 | |
294 | // (e) Calling across device/host boundary is not something you should do. |
295 | if ((CallerTarget == CUDAFunctionTarget::Host && |
296 | CalleeTarget == CUDAFunctionTarget::Device) || |
297 | (CallerTarget == CUDAFunctionTarget::Device && |
298 | CalleeTarget == CUDAFunctionTarget::Host) || |
299 | (CallerTarget == CUDAFunctionTarget::Global && |
300 | CalleeTarget == CUDAFunctionTarget::Host)) |
301 | return CFP_Never; |
302 | |
303 | llvm_unreachable("All cases should've been handled by now." ); |
304 | } |
305 | |
306 | template <typename AttrT> static bool hasImplicitAttr(const FunctionDecl *D) { |
307 | if (!D) |
308 | return false; |
309 | if (auto *A = D->getAttr<AttrT>()) |
310 | return A->isImplicit(); |
311 | return D->isImplicit(); |
312 | } |
313 | |
314 | bool SemaCUDA::isImplicitHostDeviceFunction(const FunctionDecl *D) { |
315 | bool IsImplicitDevAttr = hasImplicitAttr<CUDADeviceAttr>(D); |
316 | bool IsImplicitHostAttr = hasImplicitAttr<CUDAHostAttr>(D); |
317 | return IsImplicitDevAttr && IsImplicitHostAttr; |
318 | } |
319 | |
320 | void SemaCUDA::EraseUnwantedMatches( |
321 | const FunctionDecl *Caller, |
322 | SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) { |
323 | if (Matches.size() <= 1) |
324 | return; |
325 | |
326 | using Pair = std::pair<DeclAccessPair, FunctionDecl*>; |
327 | |
328 | // Gets the CUDA function preference for a call from Caller to Match. |
329 | auto GetCFP = [&](const Pair &Match) { |
330 | return IdentifyPreference(Caller, Callee: Match.second); |
331 | }; |
332 | |
333 | // Find the best call preference among the functions in Matches. |
334 | CUDAFunctionPreference BestCFP = GetCFP(*std::max_element( |
335 | first: Matches.begin(), last: Matches.end(), |
336 | comp: [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); })); |
337 | |
338 | // Erase all functions with lower priority. |
339 | llvm::erase_if(C&: Matches, |
340 | P: [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }); |
341 | } |
342 | |
343 | /// When an implicitly-declared special member has to invoke more than one |
344 | /// base/field special member, conflicts may occur in the targets of these |
345 | /// members. For example, if one base's member __host__ and another's is |
346 | /// __device__, it's a conflict. |
347 | /// This function figures out if the given targets \param Target1 and |
348 | /// \param Target2 conflict, and if they do not it fills in |
349 | /// \param ResolvedTarget with a target that resolves for both calls. |
350 | /// \return true if there's a conflict, false otherwise. |
351 | static bool |
352 | resolveCalleeCUDATargetConflict(CUDAFunctionTarget Target1, |
353 | CUDAFunctionTarget Target2, |
354 | CUDAFunctionTarget *ResolvedTarget) { |
355 | // Only free functions and static member functions may be global. |
356 | assert(Target1 != CUDAFunctionTarget::Global); |
357 | assert(Target2 != CUDAFunctionTarget::Global); |
358 | |
359 | if (Target1 == CUDAFunctionTarget::HostDevice) { |
360 | *ResolvedTarget = Target2; |
361 | } else if (Target2 == CUDAFunctionTarget::HostDevice) { |
362 | *ResolvedTarget = Target1; |
363 | } else if (Target1 != Target2) { |
364 | return true; |
365 | } else { |
366 | *ResolvedTarget = Target1; |
367 | } |
368 | |
369 | return false; |
370 | } |
371 | |
372 | bool SemaCUDA::inferTargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl, |
373 | CXXSpecialMemberKind CSM, |
374 | CXXMethodDecl *MemberDecl, |
375 | bool ConstRHS, |
376 | bool Diagnose) { |
377 | // If the defaulted special member is defined lexically outside of its |
378 | // owning class, or the special member already has explicit device or host |
379 | // attributes, do not infer. |
380 | bool InClass = MemberDecl->getLexicalParent() == MemberDecl->getParent(); |
381 | bool HasH = MemberDecl->hasAttr<CUDAHostAttr>(); |
382 | bool HasD = MemberDecl->hasAttr<CUDADeviceAttr>(); |
383 | bool HasExplicitAttr = |
384 | (HasD && !MemberDecl->getAttr<CUDADeviceAttr>()->isImplicit()) || |
385 | (HasH && !MemberDecl->getAttr<CUDAHostAttr>()->isImplicit()); |
386 | if (!InClass || HasExplicitAttr) |
387 | return false; |
388 | |
389 | std::optional<CUDAFunctionTarget> InferredTarget; |
390 | |
391 | // We're going to invoke special member lookup; mark that these special |
392 | // members are called from this one, and not from its caller. |
393 | Sema::ContextRAII MethodContext(SemaRef, MemberDecl); |
394 | |
395 | // Look for special members in base classes that should be invoked from here. |
396 | // Infer the target of this member base on the ones it should call. |
397 | // Skip direct and indirect virtual bases for abstract classes. |
398 | llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases; |
399 | for (const auto &B : ClassDecl->bases()) { |
400 | if (!B.isVirtual()) { |
401 | Bases.push_back(Elt: &B); |
402 | } |
403 | } |
404 | |
405 | if (!ClassDecl->isAbstract()) { |
406 | llvm::append_range(C&: Bases, R: llvm::make_pointer_range(Range: ClassDecl->vbases())); |
407 | } |
408 | |
409 | for (const auto *B : Bases) { |
410 | const RecordType *BaseType = B->getType()->getAs<RecordType>(); |
411 | if (!BaseType) { |
412 | continue; |
413 | } |
414 | |
415 | CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(Val: BaseType->getDecl()); |
416 | Sema::SpecialMemberOverloadResult SMOR = |
417 | SemaRef.LookupSpecialMember(D: BaseClassDecl, SM: CSM, |
418 | /* ConstArg */ ConstRHS, |
419 | /* VolatileArg */ false, |
420 | /* RValueThis */ false, |
421 | /* ConstThis */ false, |
422 | /* VolatileThis */ false); |
423 | |
424 | if (!SMOR.getMethod()) |
425 | continue; |
426 | |
427 | CUDAFunctionTarget BaseMethodTarget = IdentifyTarget(SMOR.getMethod()); |
428 | if (!InferredTarget) { |
429 | InferredTarget = BaseMethodTarget; |
430 | } else { |
431 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
432 | Target1: *InferredTarget, Target2: BaseMethodTarget, ResolvedTarget: &*InferredTarget); |
433 | if (ResolutionError) { |
434 | if (Diagnose) { |
435 | Diag(ClassDecl->getLocation(), |
436 | diag::note_implicit_member_target_infer_collision) |
437 | << (unsigned)CSM << llvm::to_underlying(*InferredTarget) |
438 | << llvm::to_underlying(BaseMethodTarget); |
439 | } |
440 | MemberDecl->addAttr( |
441 | CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); |
442 | return true; |
443 | } |
444 | } |
445 | } |
446 | |
447 | // Same as for bases, but now for special members of fields. |
448 | for (const auto *F : ClassDecl->fields()) { |
449 | if (F->isInvalidDecl()) { |
450 | continue; |
451 | } |
452 | |
453 | const RecordType *FieldType = |
454 | getASTContext().getBaseElementType(F->getType())->getAs<RecordType>(); |
455 | if (!FieldType) { |
456 | continue; |
457 | } |
458 | |
459 | CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl()); |
460 | Sema::SpecialMemberOverloadResult SMOR = |
461 | SemaRef.LookupSpecialMember(FieldRecDecl, CSM, |
462 | /* ConstArg */ ConstRHS && !F->isMutable(), |
463 | /* VolatileArg */ false, |
464 | /* RValueThis */ false, |
465 | /* ConstThis */ false, |
466 | /* VolatileThis */ false); |
467 | |
468 | if (!SMOR.getMethod()) |
469 | continue; |
470 | |
471 | CUDAFunctionTarget FieldMethodTarget = IdentifyTarget(SMOR.getMethod()); |
472 | if (!InferredTarget) { |
473 | InferredTarget = FieldMethodTarget; |
474 | } else { |
475 | bool ResolutionError = resolveCalleeCUDATargetConflict( |
476 | *InferredTarget, FieldMethodTarget, &*InferredTarget); |
477 | if (ResolutionError) { |
478 | if (Diagnose) { |
479 | Diag(ClassDecl->getLocation(), |
480 | diag::note_implicit_member_target_infer_collision) |
481 | << (unsigned)CSM << llvm::to_underlying(*InferredTarget) |
482 | << llvm::to_underlying(FieldMethodTarget); |
483 | } |
484 | MemberDecl->addAttr( |
485 | CUDAInvalidTargetAttr::CreateImplicit(getASTContext())); |
486 | return true; |
487 | } |
488 | } |
489 | } |
490 | |
491 | |
492 | // If no target was inferred, mark this member as __host__ __device__; |
493 | // it's the least restrictive option that can be invoked from any target. |
494 | bool NeedsH = true, NeedsD = true; |
495 | if (InferredTarget) { |
496 | if (*InferredTarget == CUDAFunctionTarget::Device) |
497 | NeedsH = false; |
498 | else if (*InferredTarget == CUDAFunctionTarget::Host) |
499 | NeedsD = false; |
500 | } |
501 | |
502 | // We either setting attributes first time, or the inferred ones must match |
503 | // previously set ones. |
504 | if (NeedsD && !HasD) |
505 | MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
506 | if (NeedsH && !HasH) |
507 | MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
508 | |
509 | return false; |
510 | } |
511 | |
512 | bool SemaCUDA::isEmptyConstructor(SourceLocation Loc, CXXConstructorDecl *CD) { |
513 | if (!CD->isDefined() && CD->isTemplateInstantiation()) |
514 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: CD->getFirstDecl()); |
515 | |
516 | // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered |
517 | // empty at a point in the translation unit, if it is either a |
518 | // trivial constructor |
519 | if (CD->isTrivial()) |
520 | return true; |
521 | |
522 | // ... or it satisfies all of the following conditions: |
523 | // The constructor function has been defined. |
524 | // The constructor function has no parameters, |
525 | // and the function body is an empty compound statement. |
526 | if (!(CD->hasTrivialBody() && CD->getNumParams() == 0)) |
527 | return false; |
528 | |
529 | // Its class has no virtual functions and no virtual base classes. |
530 | if (CD->getParent()->isDynamicClass()) |
531 | return false; |
532 | |
533 | // Union ctor does not call ctors of its data members. |
534 | if (CD->getParent()->isUnion()) |
535 | return true; |
536 | |
537 | // The only form of initializer allowed is an empty constructor. |
538 | // This will recursively check all base classes and member initializers |
539 | if (!llvm::all_of(Range: CD->inits(), P: [&](const CXXCtorInitializer *CI) { |
540 | if (const CXXConstructExpr *CE = |
541 | dyn_cast<CXXConstructExpr>(Val: CI->getInit())) |
542 | return isEmptyConstructor(Loc, CD: CE->getConstructor()); |
543 | return false; |
544 | })) |
545 | return false; |
546 | |
547 | return true; |
548 | } |
549 | |
550 | bool SemaCUDA::isEmptyDestructor(SourceLocation Loc, CXXDestructorDecl *DD) { |
551 | // No destructor -> no problem. |
552 | if (!DD) |
553 | return true; |
554 | |
555 | if (!DD->isDefined() && DD->isTemplateInstantiation()) |
556 | SemaRef.InstantiateFunctionDefinition(PointOfInstantiation: Loc, Function: DD->getFirstDecl()); |
557 | |
558 | // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered |
559 | // empty at a point in the translation unit, if it is either a |
560 | // trivial constructor |
561 | if (DD->isTrivial()) |
562 | return true; |
563 | |
564 | // ... or it satisfies all of the following conditions: |
565 | // The destructor function has been defined. |
566 | // and the function body is an empty compound statement. |
567 | if (!DD->hasTrivialBody()) |
568 | return false; |
569 | |
570 | const CXXRecordDecl *ClassDecl = DD->getParent(); |
571 | |
572 | // Its class has no virtual functions and no virtual base classes. |
573 | if (ClassDecl->isDynamicClass()) |
574 | return false; |
575 | |
576 | // Union does not have base class and union dtor does not call dtors of its |
577 | // data members. |
578 | if (DD->getParent()->isUnion()) |
579 | return true; |
580 | |
581 | // Only empty destructors are allowed. This will recursively check |
582 | // destructors for all base classes... |
583 | if (!llvm::all_of(Range: ClassDecl->bases(), P: [&](const CXXBaseSpecifier &BS) { |
584 | if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl()) |
585 | return isEmptyDestructor(Loc, DD: RD->getDestructor()); |
586 | return true; |
587 | })) |
588 | return false; |
589 | |
590 | // ... and member fields. |
591 | if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) { |
592 | if (CXXRecordDecl *RD = Field->getType() |
593 | ->getBaseElementTypeUnsafe() |
594 | ->getAsCXXRecordDecl()) |
595 | return isEmptyDestructor(Loc, RD->getDestructor()); |
596 | return true; |
597 | })) |
598 | return false; |
599 | |
600 | return true; |
601 | } |
602 | |
603 | namespace { |
604 | enum CUDAInitializerCheckKind { |
605 | CICK_DeviceOrConstant, // Check initializer for device/constant variable |
606 | CICK_Shared, // Check initializer for shared variable |
607 | }; |
608 | |
609 | bool IsDependentVar(VarDecl *VD) { |
610 | if (VD->getType()->isDependentType()) |
611 | return true; |
612 | if (const auto *Init = VD->getInit()) |
613 | return Init->isValueDependent(); |
614 | return false; |
615 | } |
616 | |
617 | // Check whether a variable has an allowed initializer for a CUDA device side |
618 | // variable with global storage. \p VD may be a host variable to be checked for |
619 | // potential promotion to device side variable. |
620 | // |
621 | // CUDA/HIP allows only empty constructors as initializers for global |
622 | // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all |
623 | // __shared__ variables whether they are local or not (they all are implicitly |
624 | // static in CUDA). One exception is that CUDA allows constant initializers |
625 | // for __constant__ and __device__ variables. |
626 | bool HasAllowedCUDADeviceStaticInitializer(SemaCUDA &S, VarDecl *VD, |
627 | CUDAInitializerCheckKind CheckKind) { |
628 | assert(!VD->isInvalidDecl() && VD->hasGlobalStorage()); |
629 | assert(!IsDependentVar(VD) && "do not check dependent var" ); |
630 | const Expr *Init = VD->getInit(); |
631 | auto IsEmptyInit = [&](const Expr *Init) { |
632 | if (!Init) |
633 | return true; |
634 | if (const auto *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
635 | return S.isEmptyConstructor(VD->getLocation(), CE->getConstructor()); |
636 | } |
637 | return false; |
638 | }; |
639 | auto IsConstantInit = [&](const Expr *Init) { |
640 | assert(Init); |
641 | ASTContext::CUDAConstantEvalContextRAII EvalCtx(S.getASTContext(), |
642 | /*NoWronSidedVars=*/true); |
643 | return Init->isConstantInitializer(S.getASTContext(), |
644 | VD->getType()->isReferenceType()); |
645 | }; |
646 | auto HasEmptyDtor = [&](VarDecl *VD) { |
647 | if (const auto *RD = VD->getType()->getAsCXXRecordDecl()) |
648 | return S.isEmptyDestructor(VD->getLocation(), RD->getDestructor()); |
649 | return true; |
650 | }; |
651 | if (CheckKind == CICK_Shared) |
652 | return IsEmptyInit(Init) && HasEmptyDtor(VD); |
653 | return S.getLangOpts().GPUAllowDeviceInit || |
654 | ((IsEmptyInit(Init) || IsConstantInit(Init)) && HasEmptyDtor(VD)); |
655 | } |
656 | } // namespace |
657 | |
658 | void SemaCUDA::checkAllowedInitializer(VarDecl *VD) { |
659 | // Return early if VD is inside a non-instantiated template function since |
660 | // the implicit constructor is not defined yet. |
661 | if (const FunctionDecl *FD = |
662 | dyn_cast_or_null<FunctionDecl>(VD->getDeclContext())) |
663 | if (FD->isDependentContext()) |
664 | return; |
665 | |
666 | // Do not check dependent variables since the ctor/dtor/initializer are not |
667 | // determined. Do it after instantiation. |
668 | if (VD->isInvalidDecl() || !VD->hasInit() || !VD->hasGlobalStorage() || |
669 | IsDependentVar(VD)) |
670 | return; |
671 | const Expr *Init = VD->getInit(); |
672 | bool IsSharedVar = VD->hasAttr<CUDASharedAttr>(); |
673 | bool IsDeviceOrConstantVar = |
674 | !IsSharedVar && |
675 | (VD->hasAttr<CUDADeviceAttr>() || VD->hasAttr<CUDAConstantAttr>()); |
676 | if (IsDeviceOrConstantVar || IsSharedVar) { |
677 | if (HasAllowedCUDADeviceStaticInitializer( |
678 | S&: *this, VD, CheckKind: IsSharedVar ? CICK_Shared : CICK_DeviceOrConstant)) |
679 | return; |
680 | Diag(VD->getLocation(), |
681 | IsSharedVar ? diag::err_shared_var_init : diag::err_dynamic_var_init) |
682 | << Init->getSourceRange(); |
683 | VD->setInvalidDecl(); |
684 | } else { |
685 | // This is a host-side global variable. Check that the initializer is |
686 | // callable from the host side. |
687 | const FunctionDecl *InitFn = nullptr; |
688 | if (const CXXConstructExpr *CE = dyn_cast<CXXConstructExpr>(Val: Init)) { |
689 | InitFn = CE->getConstructor(); |
690 | } else if (const CallExpr *CE = dyn_cast<CallExpr>(Val: Init)) { |
691 | InitFn = CE->getDirectCallee(); |
692 | } |
693 | if (InitFn) { |
694 | CUDAFunctionTarget InitFnTarget = IdentifyTarget(D: InitFn); |
695 | if (InitFnTarget != CUDAFunctionTarget::Host && |
696 | InitFnTarget != CUDAFunctionTarget::HostDevice) { |
697 | Diag(VD->getLocation(), diag::err_ref_bad_target_global_initializer) |
698 | << llvm::to_underlying(InitFnTarget) << InitFn; |
699 | Diag(InitFn->getLocation(), diag::note_previous_decl) << InitFn; |
700 | VD->setInvalidDecl(); |
701 | } |
702 | } |
703 | } |
704 | } |
705 | |
706 | void SemaCUDA::RecordImplicitHostDeviceFuncUsedByDevice( |
707 | const FunctionDecl *Callee) { |
708 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
709 | if (!Caller) |
710 | return; |
711 | |
712 | if (!isImplicitHostDeviceFunction(D: Callee)) |
713 | return; |
714 | |
715 | CUDAFunctionTarget CallerTarget = IdentifyTarget(D: Caller); |
716 | |
717 | // Record whether an implicit host device function is used on device side. |
718 | if (CallerTarget != CUDAFunctionTarget::Device && |
719 | CallerTarget != CUDAFunctionTarget::Global && |
720 | (CallerTarget != CUDAFunctionTarget::HostDevice || |
721 | (isImplicitHostDeviceFunction(D: Caller) && |
722 | !getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.count(V: Caller)))) |
723 | return; |
724 | |
725 | getASTContext().CUDAImplicitHostDeviceFunUsedByDevice.insert(V: Callee); |
726 | } |
727 | |
728 | // With -fcuda-host-device-constexpr, an unattributed constexpr function is |
729 | // treated as implicitly __host__ __device__, unless: |
730 | // * it is a variadic function (device-side variadic functions are not |
731 | // allowed), or |
732 | // * a __device__ function with this signature was already declared, in which |
733 | // case in which case we output an error, unless the __device__ decl is in a |
734 | // system header, in which case we leave the constexpr function unattributed. |
735 | // |
736 | // In addition, all function decls are treated as __host__ __device__ when |
737 | // ForceHostDeviceDepth > 0 (corresponding to code within a |
738 | // #pragma clang force_cuda_host_device_begin/end |
739 | // pair). |
740 | void SemaCUDA::maybeAddHostDeviceAttrs(FunctionDecl *NewD, |
741 | const LookupResult &Previous) { |
742 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
743 | |
744 | if (ForceHostDeviceDepth > 0) { |
745 | if (!NewD->hasAttr<CUDAHostAttr>()) |
746 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
747 | if (!NewD->hasAttr<CUDADeviceAttr>()) |
748 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
749 | return; |
750 | } |
751 | |
752 | // If a template function has no host/device/global attributes, |
753 | // make it implicitly host device function. |
754 | if (getLangOpts().OffloadImplicitHostDeviceTemplates && |
755 | !NewD->hasAttr<CUDAHostAttr>() && !NewD->hasAttr<CUDADeviceAttr>() && |
756 | !NewD->hasAttr<CUDAGlobalAttr>() && |
757 | (NewD->getDescribedFunctionTemplate() || |
758 | NewD->isFunctionTemplateSpecialization())) { |
759 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
760 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
761 | return; |
762 | } |
763 | |
764 | if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() || |
765 | NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() || |
766 | NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>()) |
767 | return; |
768 | |
769 | // Is D a __device__ function with the same signature as NewD, ignoring CUDA |
770 | // attributes? |
771 | auto IsMatchingDeviceFn = [&](NamedDecl *D) { |
772 | if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(Val: D)) |
773 | D = Using->getTargetDecl(); |
774 | FunctionDecl *OldD = D->getAsFunction(); |
775 | return OldD && OldD->hasAttr<CUDADeviceAttr>() && |
776 | !OldD->hasAttr<CUDAHostAttr>() && |
777 | !SemaRef.IsOverload(NewD, OldD, |
778 | /* UseMemberUsingDeclRules = */ false, |
779 | /* ConsiderCudaAttrs = */ false); |
780 | }; |
781 | auto It = llvm::find_if(Range: Previous, P: IsMatchingDeviceFn); |
782 | if (It != Previous.end()) { |
783 | // We found a __device__ function with the same name and signature as NewD |
784 | // (ignoring CUDA attrs). This is an error unless that function is defined |
785 | // in a system header, in which case we simply return without making NewD |
786 | // host+device. |
787 | NamedDecl *Match = *It; |
788 | if (!SemaRef.getSourceManager().isInSystemHeader(Loc: Match->getLocation())) { |
789 | Diag(NewD->getLocation(), |
790 | diag::err_cuda_unattributed_constexpr_cannot_overload_device) |
791 | << NewD; |
792 | Diag(Match->getLocation(), |
793 | diag::note_cuda_conflicting_device_function_declared_here); |
794 | } |
795 | return; |
796 | } |
797 | |
798 | NewD->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
799 | NewD->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
800 | } |
801 | |
802 | // TODO: `__constant__` memory may be a limited resource for certain targets. |
803 | // A safeguard may be needed at the end of compilation pipeline if |
804 | // `__constant__` memory usage goes beyond limit. |
805 | void SemaCUDA::MaybeAddConstantAttr(VarDecl *VD) { |
806 | // Do not promote dependent variables since the cotr/dtor/initializer are |
807 | // not determined. Do it after instantiation. |
808 | if (getLangOpts().CUDAIsDevice && !VD->hasAttr<CUDAConstantAttr>() && |
809 | !VD->hasAttr<CUDASharedAttr>() && |
810 | (VD->isFileVarDecl() || VD->isStaticDataMember()) && |
811 | !IsDependentVar(VD) && |
812 | ((VD->isConstexpr() || VD->getType().isConstQualified()) && |
813 | HasAllowedCUDADeviceStaticInitializer(*this, VD, |
814 | CICK_DeviceOrConstant))) { |
815 | VD->addAttr(CUDAConstantAttr::CreateImplicit(getASTContext())); |
816 | } |
817 | } |
818 | |
819 | SemaBase::SemaDiagnosticBuilder SemaCUDA::DiagIfDeviceCode(SourceLocation Loc, |
820 | unsigned DiagID) { |
821 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
822 | FunctionDecl *CurFunContext = |
823 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
824 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
825 | if (!CurFunContext) |
826 | return SemaDiagnosticBuilder::K_Nop; |
827 | switch (CurrentTarget()) { |
828 | case CUDAFunctionTarget::Global: |
829 | case CUDAFunctionTarget::Device: |
830 | return SemaDiagnosticBuilder::K_Immediate; |
831 | case CUDAFunctionTarget::HostDevice: |
832 | // An HD function counts as host code if we're compiling for host, and |
833 | // device code if we're compiling for device. Defer any errors in device |
834 | // mode until the function is known-emitted. |
835 | if (!getLangOpts().CUDAIsDevice) |
836 | return SemaDiagnosticBuilder::K_Nop; |
837 | if (SemaRef.IsLastErrorImmediate && |
838 | getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) |
839 | return SemaDiagnosticBuilder::K_Immediate; |
840 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
841 | Sema::FunctionEmissionStatus::Emitted) |
842 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
843 | : SemaDiagnosticBuilder::K_Deferred; |
844 | default: |
845 | return SemaDiagnosticBuilder::K_Nop; |
846 | } |
847 | }(); |
848 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
849 | } |
850 | |
851 | Sema::SemaDiagnosticBuilder SemaCUDA::DiagIfHostCode(SourceLocation Loc, |
852 | unsigned DiagID) { |
853 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
854 | FunctionDecl *CurFunContext = |
855 | SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
856 | SemaDiagnosticBuilder::Kind DiagKind = [&] { |
857 | if (!CurFunContext) |
858 | return SemaDiagnosticBuilder::K_Nop; |
859 | switch (CurrentTarget()) { |
860 | case CUDAFunctionTarget::Host: |
861 | return SemaDiagnosticBuilder::K_Immediate; |
862 | case CUDAFunctionTarget::HostDevice: |
863 | // An HD function counts as host code if we're compiling for host, and |
864 | // device code if we're compiling for device. Defer any errors in device |
865 | // mode until the function is known-emitted. |
866 | if (getLangOpts().CUDAIsDevice) |
867 | return SemaDiagnosticBuilder::K_Nop; |
868 | if (SemaRef.IsLastErrorImmediate && |
869 | getDiagnostics().getDiagnosticIDs()->isBuiltinNote(DiagID)) |
870 | return SemaDiagnosticBuilder::K_Immediate; |
871 | return (SemaRef.getEmissionStatus(Decl: CurFunContext) == |
872 | Sema::FunctionEmissionStatus::Emitted) |
873 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
874 | : SemaDiagnosticBuilder::K_Deferred; |
875 | default: |
876 | return SemaDiagnosticBuilder::K_Nop; |
877 | } |
878 | }(); |
879 | return SemaDiagnosticBuilder(DiagKind, Loc, DiagID, CurFunContext, SemaRef); |
880 | } |
881 | |
882 | bool SemaCUDA::CheckCall(SourceLocation Loc, FunctionDecl *Callee) { |
883 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
884 | assert(Callee && "Callee may not be null." ); |
885 | |
886 | const auto &ExprEvalCtx = SemaRef.currentEvaluationContext(); |
887 | if (ExprEvalCtx.isUnevaluated() || ExprEvalCtx.isConstantEvaluated()) |
888 | return true; |
889 | |
890 | // FIXME: Is bailing out early correct here? Should we instead assume that |
891 | // the caller is a global initializer? |
892 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
893 | if (!Caller) |
894 | return true; |
895 | |
896 | // If the caller is known-emitted, mark the callee as known-emitted. |
897 | // Otherwise, mark the call in our call graph so we can traverse it later. |
898 | bool CallerKnownEmitted = SemaRef.getEmissionStatus(Decl: Caller) == |
899 | Sema::FunctionEmissionStatus::Emitted; |
900 | SemaDiagnosticBuilder::Kind DiagKind = [this, Caller, Callee, |
901 | CallerKnownEmitted] { |
902 | switch (IdentifyPreference(Caller, Callee)) { |
903 | case CFP_Never: |
904 | case CFP_WrongSide: |
905 | assert(Caller && "Never/wrongSide calls require a non-null caller" ); |
906 | // If we know the caller will be emitted, we know this wrong-side call |
907 | // will be emitted, so it's an immediate error. Otherwise, defer the |
908 | // error until we know the caller is emitted. |
909 | return CallerKnownEmitted |
910 | ? SemaDiagnosticBuilder::K_ImmediateWithCallStack |
911 | : SemaDiagnosticBuilder::K_Deferred; |
912 | default: |
913 | return SemaDiagnosticBuilder::K_Nop; |
914 | } |
915 | }(); |
916 | |
917 | if (DiagKind == SemaDiagnosticBuilder::K_Nop) { |
918 | // For -fgpu-rdc, keep track of external kernels used by host functions. |
919 | if (getLangOpts().CUDAIsDevice && getLangOpts().GPURelocatableDeviceCode && |
920 | Callee->hasAttr<CUDAGlobalAttr>() && !Callee->isDefined() && |
921 | (!Caller || (!Caller->getDescribedFunctionTemplate() && |
922 | getASTContext().GetGVALinkageForFunction(Caller) == |
923 | GVA_StrongExternal))) |
924 | getASTContext().CUDAExternalDeviceDeclODRUsedByHost.insert(Callee); |
925 | return true; |
926 | } |
927 | |
928 | // Avoid emitting this error twice for the same location. Using a hashtable |
929 | // like this is unfortunate, but because we must continue parsing as normal |
930 | // after encountering a deferred error, it's otherwise very tricky for us to |
931 | // ensure that we only emit this deferred error once. |
932 | if (!LocsWithCUDACallDiags.insert(V: {.FD: Caller, .Loc: Loc}).second) |
933 | return true; |
934 | |
935 | SemaDiagnosticBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, |
936 | SemaRef) |
937 | << llvm::to_underlying(IdentifyTarget(Callee)) << /*function*/ 0 << Callee |
938 | << llvm::to_underlying(IdentifyTarget(Caller)); |
939 | if (!Callee->getBuiltinID()) |
940 | SemaDiagnosticBuilder(DiagKind, Callee->getLocation(), |
941 | diag::note_previous_decl, Caller, SemaRef) |
942 | << Callee; |
943 | return DiagKind != SemaDiagnosticBuilder::K_Immediate && |
944 | DiagKind != SemaDiagnosticBuilder::K_ImmediateWithCallStack; |
945 | } |
946 | |
947 | // Check the wrong-sided reference capture of lambda for CUDA/HIP. |
948 | // A lambda function may capture a stack variable by reference when it is |
949 | // defined and uses the capture by reference when the lambda is called. When |
950 | // the capture and use happen on different sides, the capture is invalid and |
951 | // should be diagnosed. |
952 | void SemaCUDA::CheckLambdaCapture(CXXMethodDecl *Callee, |
953 | const sema::Capture &Capture) { |
954 | // In host compilation we only need to check lambda functions emitted on host |
955 | // side. In such lambda functions, a reference capture is invalid only |
956 | // if the lambda structure is populated by a device function or kernel then |
957 | // is passed to and called by a host function. However that is impossible, |
958 | // since a device function or kernel can only call a device function, also a |
959 | // kernel cannot pass a lambda back to a host function since we cannot |
960 | // define a kernel argument type which can hold the lambda before the lambda |
961 | // itself is defined. |
962 | if (!getLangOpts().CUDAIsDevice) |
963 | return; |
964 | |
965 | // File-scope lambda can only do init captures for global variables, which |
966 | // results in passing by value for these global variables. |
967 | FunctionDecl *Caller = SemaRef.getCurFunctionDecl(/*AllowLambda=*/true); |
968 | if (!Caller) |
969 | return; |
970 | |
971 | // In device compilation, we only need to check lambda functions which are |
972 | // emitted on device side. For such lambdas, a reference capture is invalid |
973 | // only if the lambda structure is populated by a host function then passed |
974 | // to and called in a device function or kernel. |
975 | bool CalleeIsDevice = Callee->hasAttr<CUDADeviceAttr>(); |
976 | bool CallerIsHost = |
977 | !Caller->hasAttr<CUDAGlobalAttr>() && !Caller->hasAttr<CUDADeviceAttr>(); |
978 | bool ShouldCheck = CalleeIsDevice && CallerIsHost; |
979 | if (!ShouldCheck || !Capture.isReferenceCapture()) |
980 | return; |
981 | auto DiagKind = SemaDiagnosticBuilder::K_Deferred; |
982 | if (Capture.isVariableCapture() && !getLangOpts().HIPStdPar) { |
983 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
984 | diag::err_capture_bad_target, Callee, SemaRef) |
985 | << Capture.getVariable(); |
986 | } else if (Capture.isThisCapture()) { |
987 | // Capture of this pointer is allowed since this pointer may be pointing to |
988 | // managed memory which is accessible on both device and host sides. It only |
989 | // results in invalid memory access if this pointer points to memory not |
990 | // accessible on device side. |
991 | SemaDiagnosticBuilder(DiagKind, Capture.getLocation(), |
992 | diag::warn_maybe_capture_bad_target_this_ptr, Callee, |
993 | SemaRef); |
994 | } |
995 | } |
996 | |
997 | void SemaCUDA::SetLambdaAttrs(CXXMethodDecl *Method) { |
998 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
999 | if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>()) |
1000 | return; |
1001 | Method->addAttr(CUDADeviceAttr::CreateImplicit(getASTContext())); |
1002 | Method->addAttr(CUDAHostAttr::CreateImplicit(getASTContext())); |
1003 | } |
1004 | |
1005 | void SemaCUDA::checkTargetOverload(FunctionDecl *NewFD, |
1006 | const LookupResult &Previous) { |
1007 | assert(getLangOpts().CUDA && "Should only be called during CUDA compilation" ); |
1008 | CUDAFunctionTarget NewTarget = IdentifyTarget(D: NewFD); |
1009 | for (NamedDecl *OldND : Previous) { |
1010 | FunctionDecl *OldFD = OldND->getAsFunction(); |
1011 | if (!OldFD) |
1012 | continue; |
1013 | |
1014 | CUDAFunctionTarget OldTarget = IdentifyTarget(D: OldFD); |
1015 | // Don't allow HD and global functions to overload other functions with the |
1016 | // same signature. We allow overloading based on CUDA attributes so that |
1017 | // functions can have different implementations on the host and device, but |
1018 | // HD/global functions "exist" in some sense on both the host and device, so |
1019 | // should have the same implementation on both sides. |
1020 | if (NewTarget != OldTarget && |
1021 | ((NewTarget == CUDAFunctionTarget::HostDevice && |
1022 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
1023 | isImplicitHostDeviceFunction(D: NewFD) && |
1024 | OldTarget == CUDAFunctionTarget::Device)) || |
1025 | (OldTarget == CUDAFunctionTarget::HostDevice && |
1026 | !(getLangOpts().OffloadImplicitHostDeviceTemplates && |
1027 | isImplicitHostDeviceFunction(D: OldFD) && |
1028 | NewTarget == CUDAFunctionTarget::Device)) || |
1029 | (NewTarget == CUDAFunctionTarget::Global) || |
1030 | (OldTarget == CUDAFunctionTarget::Global)) && |
1031 | !SemaRef.IsOverload(New: NewFD, Old: OldFD, /* UseMemberUsingDeclRules = */ false, |
1032 | /* ConsiderCudaAttrs = */ false)) { |
1033 | Diag(NewFD->getLocation(), diag::err_cuda_ovl_target) |
1034 | << llvm::to_underlying(NewTarget) << NewFD->getDeclName() |
1035 | << llvm::to_underlying(OldTarget) << OldFD; |
1036 | Diag(OldFD->getLocation(), diag::note_previous_declaration); |
1037 | NewFD->setInvalidDecl(); |
1038 | break; |
1039 | } |
1040 | } |
1041 | } |
1042 | |
1043 | template <typename AttrTy> |
1044 | static void copyAttrIfPresent(Sema &S, FunctionDecl *FD, |
1045 | const FunctionDecl &TemplateFD) { |
1046 | if (AttrTy *Attribute = TemplateFD.getAttr<AttrTy>()) { |
1047 | AttrTy *Clone = Attribute->clone(S.Context); |
1048 | Clone->setInherited(true); |
1049 | FD->addAttr(A: Clone); |
1050 | } |
1051 | } |
1052 | |
1053 | void SemaCUDA::inheritTargetAttrs(FunctionDecl *FD, |
1054 | const FunctionTemplateDecl &TD) { |
1055 | const FunctionDecl &TemplateFD = *TD.getTemplatedDecl(); |
1056 | copyAttrIfPresent<CUDAGlobalAttr>(SemaRef, FD, TemplateFD); |
1057 | copyAttrIfPresent<CUDAHostAttr>(SemaRef, FD, TemplateFD); |
1058 | copyAttrIfPresent<CUDADeviceAttr>(SemaRef, FD, TemplateFD); |
1059 | } |
1060 | |
1061 | std::string SemaCUDA::getConfigureFuncName() const { |
1062 | if (getLangOpts().HIP) |
1063 | return getLangOpts().HIPUseNewLaunchAPI ? "__hipPushCallConfiguration" |
1064 | : "hipConfigureCall" ; |
1065 | |
1066 | // New CUDA kernel launch sequence. |
1067 | if (CudaFeatureEnabled(getASTContext().getTargetInfo().getSDKVersion(), |
1068 | CudaFeature::CUDA_USES_NEW_LAUNCH)) |
1069 | return "__cudaPushCallConfiguration" ; |
1070 | |
1071 | // Legacy CUDA kernel configuration call |
1072 | return "cudaConfigureCall" ; |
1073 | } |
1074 | |